upstream/ipython Commit - r18119:8da95fea

stop using deprecated DelayedCallback...

MinRK -

r18119:8da95fea

parent child

IPython/html/services/clusters/clustermanager.py

0 +8 -7

              """Manage IPython.parallel clusters in the notebook.
              Authors:
              * Brian Granger
              """
              #-----------------------------------------------------------------------------
              #  Copyright (C) 2008-2011  The IPython Development Team
              #
              #  Distributed under the terms of the BSD License.  The full license is in
              #  the file COPYING, distributed as part of this software.
              #-----------------------------------------------------------------------------
              #-----------------------------------------------------------------------------
              # Imports
              #-----------------------------------------------------------------------------
              from tornado import web
-             from zmq.eventloop import ioloop
              from IPython.config.configurable import LoggingConfigurable
-             from IPython.utils.traitlets import Dict, Instance, CFloat
+             from IPython.utils.traitlets import Dict, Instance, Float
              from IPython.core.profileapp import list_profiles_in
              from IPython.core.profiledir import ProfileDir
              from IPython.utils import py3compat
              from IPython.utils.path import get_ipython_dir
              #-----------------------------------------------------------------------------
              # Classes
              #-----------------------------------------------------------------------------
              class ClusterManager(LoggingConfigurable):
                  profiles = Dict()
-                 delay = CFloat(1., config=True,
+                 delay = Float(1., config=True,
                      help="delay (in s) between starting the controller and the engines")
                  loop = Instance('zmq.eventloop.ioloop.IOLoop')
                  def _loop_default(self):
                      from zmq.eventloop.ioloop import IOLoop
                      return IOLoop.instance()
                  def build_launchers(self, profile_dir):
                      from IPython.parallel.apps.ipclusterapp import IPClusterStart
                      class DummyIPClusterStart(IPClusterStart):
                          """Dummy subclass to skip init steps that conflict with global app.
                          Instantiating and initializing this class should result in fully configured
                          launchers, but no other side effects or state.
                          """
                          def init_signal(self):
                              pass
                          def reinit_logging(self):
                              pass
                      starter = DummyIPClusterStart(log=self.log)
                      starter.initialize(['--profile-dir', profile_dir])
                      cl = starter.controller_launcher
                      esl = starter.engine_launcher
                      n = starter.n
                      return cl, esl, n
                  def get_profile_dir(self, name, path):
                      p = ProfileDir.find_profile_dir_by_name(path,name=name)
                      return p.location
                  def update_profiles(self):
                      """List all profiles in the ipython_dir and cwd.
                      """
                      for path in [get_ipython_dir(), py3compat.getcwd()]:
                          for profile in list_profiles_in(path):
                              pd = self.get_profile_dir(profile, path)
                              if profile not in self.profiles:
                                  self.log.debug("Adding cluster profile '%s'" % profile)
                                  self.profiles[profile] = {
                                      'profile': profile,
                                      'profile_dir': pd,
                                      'status': 'stopped'
                                  }
                  def list_profiles(self):
                      self.update_profiles()
                      # sorted list, but ensure that 'default' always comes first
                      default_first = lambda name: name if name != 'default' else ''
                      result = [self.profile_info(p) for p in sorted(self.profiles, key=default_first)]
                      return result
                  def check_profile(self, profile):
                      if profile not in self.profiles:
                          raise web.HTTPError(404, u'profile not found')
                  def profile_info(self, profile):
                      self.check_profile(profile)
                      result = {}
                      data = self.profiles.get(profile)
                      result['profile'] = profile
                      result['profile_dir'] = data['profile_dir']
                      result['status'] = data['status']
                      if 'n' in data:
                          result['n'] = data['n']
                      return result
                  def start_cluster(self, profile, n=None):
                      """Start a cluster for a given profile."""
                      self.check_profile(profile)
                      data = self.profiles[profile]
                      if data['status'] == 'running':
                          raise web.HTTPError(409, u'cluster already running')
                      cl, esl, default_n = self.build_launchers(data['profile_dir'])
                      n = n if n is not None else default_n
                      def clean_data():
                          data.pop('controller_launcher',None)
                          data.pop('engine_set_launcher',None)
                          data.pop('n',None)
                          data['status'] = 'stopped'
                      def engines_stopped(r):
                          self.log.debug('Engines stopped')
                          if cl.running:
                              cl.stop()
                          clean_data()
                      esl.on_stop(engines_stopped)
                      def controller_stopped(r):
                          self.log.debug('Controller stopped')
                          if esl.running:
                              esl.stop()
                          clean_data()
                      cl.on_stop(controller_stopped)
+                     loop = self.loop
-                     dc = ioloop.DelayedCallback(lambda: cl.start(), 0, self.loop)
-                     dc.start()
-                     dc = ioloop.DelayedCallback(lambda: esl.start(n), 1000*self.delay, self.loop)
-                     dc.start()
+                     def start():
+                         """start the controller, then the engines after a delay"""
+                         cl.start()
+                         loop.add_timeout(self.loop.time() + self.delay, lambda : esl.start(n))
+                     self.loop.add_callback(start)
                      self.log.debug('Cluster started')
                      data['controller_launcher'] = cl
                      data['engine_set_launcher'] = esl
                      data['n'] = n
                      data['status'] = 'running'
                      return self.profile_info(profile)
                  def stop_cluster(self, profile):
                      """Stop a cluster for a given profile."""
                      self.check_profile(profile)
                      data = self.profiles[profile]
                      if data['status'] == 'stopped':
                          raise web.HTTPError(409, u'cluster not running')
                      data = self.profiles[profile]
                      cl = data['controller_launcher']
                      esl = data['engine_set_launcher']
                      if cl.running:
                          cl.stop()
                      if esl.running:
                          esl.stop()
                      # Return a temp info dict, the real one is updated in the on_stop
                      # logic above.
                      result = {
                          'profile': data['profile'],
                          'profile_dir': data['profile_dir'],
                          'status': 'stopped'
                      }
                      return result
                  def stop_all_clusters(self):
                      for p in self.profiles.keys():
                          self.stop_cluster(p)

IPython/parallel/apps/baseapp.py

0 0 -18

              # encoding: utf-8
              """
              The Base Application class for IPython.parallel apps
-             Authors:
-             * Brian Granger
-             * Min RK
              """
-             #-----------------------------------------------------------------------------
-             #  Copyright (C) 2008-2011  The IPython Development Team
+             #
-             #  Distributed under the terms of the BSD License.  The full license is in
-             #  the file COPYING, distributed as part of this software.
-             #-----------------------------------------------------------------------------
-             #-----------------------------------------------------------------------------
-             # Imports
-             #-----------------------------------------------------------------------------
              import os
              import logging
              import re
              import sys
-             from subprocess import Popen, PIPE
              from IPython.config.application import catch_config_error, LevelFormatter
              from IPython.core import release
              from IPython.core.crashhandler import CrashHandler
              from IPython.core.application import (
                  BaseIPythonApplication,
                  base_aliases as base_ip_aliases,
                  base_flags as base_ip_flags
              )
              from IPython.utils.path import expand_path
              from IPython.utils.process import check_pid
              from IPython.utils import py3compat
              from IPython.utils.py3compat import unicode_type
              from IPython.utils.traitlets import Unicode, Bool, Instance, Dict
              #-----------------------------------------------------------------------------
              # Module errors
              #-----------------------------------------------------------------------------
              class PIDFileError(Exception):
                  pass
              #-----------------------------------------------------------------------------
              # Crash handler for this application
              #-----------------------------------------------------------------------------
              class ParallelCrashHandler(CrashHandler):
                  """sys.excepthook for IPython itself, leaves a detailed report on disk."""
                  def __init__(self, app):
                      contact_name = release.authors['Min'][0]
                      contact_email = release.author_email
                      bug_tracker = 'https://github.com/ipython/ipython/issues'
                      super(ParallelCrashHandler,self).__init__(
                          app, contact_name, contact_email, bug_tracker
                      )
              #-----------------------------------------------------------------------------
              # Main application
              #-----------------------------------------------------------------------------
              base_aliases = {}
              base_aliases.update(base_ip_aliases)
              base_aliases.update({
                  'work-dir' : 'BaseParallelApplication.work_dir',
                  'log-to-file' : 'BaseParallelApplication.log_to_file',
                  'clean-logs' : 'BaseParallelApplication.clean_logs',
                  'log-url' : 'BaseParallelApplication.log_url',
                  'cluster-id' : 'BaseParallelApplication.cluster_id',
              })
              base_flags = {
                  'log-to-file' : (
                      {'BaseParallelApplication' : {'log_to_file' : True}},
                      "send log output to a file"
                  )
              }
              base_flags.update(base_ip_flags)
              class BaseParallelApplication(BaseIPythonApplication):
                  """The base Application for IPython.parallel apps
                  Principle extensions to BaseIPyythonApplication:
                  * work_dir
                  * remote logging via pyzmq
                  * IOLoop instance
                  """
                  crash_handler_class = ParallelCrashHandler
                  def _log_level_default(self):
                      # temporarily override default_log_level to INFO
                      return logging.INFO
                  def _log_format_default(self):
                      """override default log format to include time"""
                      return u"%(asctime)s.%(msecs).03d [%(name)s]%(highlevel)s %(message)s"
                  work_dir = Unicode(py3compat.getcwd(), config=True,
                      help='Set the working dir for the process.'
                  )
                  def _work_dir_changed(self, name, old, new):
                      self.work_dir = unicode_type(expand_path(new))
                  log_to_file = Bool(config=True,
                      help="whether to log to a file")
                  clean_logs = Bool(False, config=True,
                      help="whether to cleanup old logfiles before starting")
                  log_url = Unicode('', config=True,
                      help="The ZMQ URL of the iplogger to aggregate logging.")
                  cluster_id = Unicode('', config=True,
                      help="""String id to add to runtime files, to prevent name collisions when
                      using multiple clusters with a single profile simultaneously.
                      When set, files will be named like: 'ipcontroller-<cluster_id>-engine.json'
                      Since this is text inserted into filenames, typical recommendations apply:
                      Simple character strings are ideal, and spaces are not recommended (but should
                      generally work).
                      """
                  )
                  def _cluster_id_changed(self, name, old, new):
                      self.name = self.__class__.name
                      if new:
                          self.name += '-%s'%new
                  def _config_files_default(self):
                      return ['ipcontroller_config.py', 'ipengine_config.py', 'ipcluster_config.py']
                  loop = Instance('zmq.eventloop.ioloop.IOLoop')
                  def _loop_default(self):
                      from zmq.eventloop.ioloop import IOLoop
                      return IOLoop.instance()
                  aliases = Dict(base_aliases)
                  flags = Dict(base_flags)
                  @catch_config_error
                  def initialize(self, argv=None):
                      """initialize the app"""
                      super(BaseParallelApplication, self).initialize(argv)
                      self.to_work_dir()
                      self.reinit_logging()
                  def to_work_dir(self):
                      wd = self.work_dir
                      if unicode_type(wd) != py3compat.getcwd():
                          os.chdir(wd)
                          self.log.info("Changing to working dir: %s" % wd)
                      # This is the working dir by now.
                      sys.path.insert(0, '')
                  def reinit_logging(self):
                      # Remove old log files
                      log_dir = self.profile_dir.log_dir
                      if self.clean_logs:
                          for f in os.listdir(log_dir):
                              if re.match(r'%s-\d+\.(log|err|out)' % self.name, f):
                                  try:
                                      os.remove(os.path.join(log_dir, f))
                                  except (OSError, IOError):
                                      # probably just conflict from sibling process
                                      # already removing it
                                      pass
                      if self.log_to_file:
                          # Start logging to the new log file
                          log_filename = self.name + u'-' + str(os.getpid()) + u'.log'
                          logfile = os.path.join(log_dir, log_filename)
                          open_log_file = open(logfile, 'w')
                      else:
                          open_log_file = None
                      if open_log_file is not None:
                          while self.log.handlers:
                              self.log.removeHandler(self.log.handlers[0])
                          self._log_handler = logging.StreamHandler(open_log_file)
                          self.log.addHandler(self._log_handler)
                      else:
                          self._log_handler = self.log.handlers[0]
                      # Add timestamps to log format:
                      self._log_formatter = LevelFormatter(self.log_format,
                                                              datefmt=self.log_datefmt)
                      self._log_handler.setFormatter(self._log_formatter)
                      # do not propagate log messages to root logger
                      # ipcluster app will sometimes print duplicate messages during shutdown
                      # if this is 1 (default):
                      self.log.propagate = False
                  def write_pid_file(self, overwrite=False):
                      """Create a .pid file in the pid_dir with my pid.
                      This must be called after pre_construct, which sets `self.pid_dir`.
                      This raises :exc:`PIDFileError` if the pid file exists already.
                      """
                      pid_file = os.path.join(self.profile_dir.pid_dir, self.name + u'.pid')
                      if os.path.isfile(pid_file):
                          pid = self.get_pid_from_file()
                          if not overwrite:
                              raise PIDFileError(
                                  'The pid file [%s] already exists. \nThis could mean that this '
                                  'server is already running with [pid=%s].' % (pid_file, pid)
                              )
                      with open(pid_file, 'w') as f:
                          self.log.info("Creating pid file: %s" % pid_file)
                          f.write(repr(os.getpid())+'\n')
                  def remove_pid_file(self):
                      """Remove the pid file.
                      This should be called at shutdown by registering a callback with
                      :func:`reactor.addSystemEventTrigger`. This needs to return
                      ``None``.
                      """
                      pid_file = os.path.join(self.profile_dir.pid_dir, self.name + u'.pid')
                      if os.path.isfile(pid_file):
                          try:
                              self.log.info("Removing pid file: %s" % pid_file)
                              os.remove(pid_file)
                          except:
                              self.log.warn("Error removing the pid file: %s" % pid_file)
                  def get_pid_from_file(self):
                      """Get the pid from the pid file.
                      If the  pid file doesn't exist a :exc:`PIDFileError` is raised.
                      """
                      pid_file = os.path.join(self.profile_dir.pid_dir, self.name + u'.pid')
                      if os.path.isfile(pid_file):
                          with open(pid_file, 'r') as f:
                              s = f.read().strip()
                              try:
                                  pid = int(s)
                              except:
                                  raise PIDFileError("invalid pid file: %s (contents: %r)"%(pid_file, s))
                              return pid
                      else:
                          raise PIDFileError('pid file not found: %s' % pid_file)
                  def check_pid(self, pid):
                      try:
                          return check_pid(pid)
                      except Exception:
                          self.log.warn(
                              "Could not determine whether pid %i is running. "
                              " Making the likely assumption that it is."%pid
                          )
                          return True

IPython/parallel/apps/ipclusterapp.py

0 +9 -31

              #!/usr/bin/env python
              # encoding: utf-8
-             """
-             The ipcluster application.
-             Authors:
-             * Brian Granger
-             * MinRK
-             """
+             """The ipcluster application."""
              from __future__ import print_function
-             #-----------------------------------------------------------------------------
-             #  Copyright (C) 2008-2011  The IPython Development Team
+             #
-             #  Distributed under the terms of the BSD License.  The full license is in
-             #  the file COPYING, distributed as part of this software.
-             #-----------------------------------------------------------------------------
-             #-----------------------------------------------------------------------------
-             # Imports
-             #-----------------------------------------------------------------------------
              import errno
              import logging
              import os
              import re
              import signal
              from subprocess import check_call, CalledProcessError, PIPE
              import zmq
-             from zmq.eventloop import ioloop
-             from IPython.config.application import Application, boolean_flag, catch_config_error
+             from IPython.config.application import catch_config_error
              from IPython.config.loader import Config
              from IPython.core.application import BaseIPythonApplication
              from IPython.core.profiledir import ProfileDir
              from IPython.utils.daemonize import daemonize
              from IPython.utils.importstring import import_item
              from IPython.utils.py3compat import string_types
              from IPython.utils.sysinfo import num_cpus
              from IPython.utils.traitlets import (Integer, Unicode, Bool, CFloat, Dict, List, Any,
                                                      DottedObjectName)
              from IPython.parallel.apps.baseapp import (
                  BaseParallelApplication,
                  PIDFileError,
                  base_flags, base_aliases
              )
              #-----------------------------------------------------------------------------
              # Module level variables
              #-----------------------------------------------------------------------------
              _description = """Start an IPython cluster for parallel computing.
              An IPython cluster consists of 1 controller and 1 or more engines.
              This command automates the startup of these processes using a wide range of
              startup methods (SSH, local processes, PBS, mpiexec, SGE, LSF, HTCondor,
              Windows HPC Server 2008). To start a cluster with 4 engines on your
              local host simply do 'ipcluster start --n=4'. For more complex usage
              you will typically do 'ipython profile create mycluster --parallel', then edit
              configuration files, followed by 'ipcluster start --profile=mycluster --n=4'.
              """
              _main_examples = """
              ipcluster start --n=4 # start a 4 node cluster on localhost
              ipcluster start -h    # show the help string for the start subcmd
              ipcluster stop -h     # show the help string for the stop subcmd
              ipcluster engines -h  # show the help string for the engines subcmd
              """
              _start_examples = """
              ipython profile create mycluster --parallel # create mycluster profile
              ipcluster start --profile=mycluster --n=4   # start mycluster with 4 nodes
              """
              _stop_examples = """
              ipcluster stop --profile=mycluster  # stop a running cluster by profile name
              """
              _engines_examples = """
              ipcluster engines --profile=mycluster --n=4  # start 4 engines only
              """
              # Exit codes for ipcluster
              # This will be the exit code if the ipcluster appears to be running because
              # a .pid file exists
              ALREADY_STARTED = 10
              # This will be the exit code if ipcluster stop is run, but there is not .pid
              # file to be found.
              ALREADY_STOPPED = 11
              # This will be the exit code if ipcluster engines is run, but there is not .pid
              # file to be found.
              NO_CLUSTER = 12
              #-----------------------------------------------------------------------------
              # Utilities
              #-----------------------------------------------------------------------------
              def find_launcher_class(clsname, kind):
                  """Return a launcher for a given clsname and kind.
                  Parameters
                  ==========
                  clsname : str
                      The full name of the launcher class, either with or without the
                      module path, or an abbreviation (MPI, SSH, SGE, PBS, LSF, HTCondor
                      WindowsHPC).
                  kind : str
                      Either 'EngineSet' or 'Controller'.
                  """
                  if '.' not in clsname:
                      # not a module, presume it's the raw name in apps.launcher
                      if kind and kind not in clsname:
                          # doesn't match necessary full class name, assume it's
                          # just 'PBS' or 'MPI' etc prefix:
                          clsname = clsname + kind + 'Launcher'
                      clsname = 'IPython.parallel.apps.launcher.'+clsname
                  klass = import_item(clsname)
                  return klass
              #-----------------------------------------------------------------------------
              # Main application
              #-----------------------------------------------------------------------------
              start_help = """Start an IPython cluster for parallel computing
              Start an ipython cluster by its profile name or cluster
              directory. Cluster directories contain configuration, log and
              security related files and are named using the convention
              'profile_<name>' and should be creating using the 'start'
              subcommand of 'ipcluster'. If your cluster directory is in
              the cwd or the ipython directory, you can simply refer to it
              using its profile name, 'ipcluster start --n=4 --profile=<profile>`,
              otherwise use the 'profile-dir' option.
              """
              stop_help = """Stop a running IPython cluster
              Stop a running ipython cluster by its profile name or cluster
              directory. Cluster directories are named using the convention
              'profile_<name>'. If your cluster directory is in
              the cwd or the ipython directory, you can simply refer to it
              using its profile name, 'ipcluster stop --profile=<profile>`, otherwise
              use the '--profile-dir' option.
              """
              engines_help = """Start engines connected to an existing IPython cluster
              Start one or more engines to connect to an existing Cluster
              by profile name or cluster directory.
              Cluster directories contain configuration, log and
              security related files and are named using the convention
              'profile_<name>' and should be creating using the 'start'
              subcommand of 'ipcluster'. If your cluster directory is in
              the cwd or the ipython directory, you can simply refer to it
              using its profile name, 'ipcluster engines --n=4 --profile=<profile>`,
              otherwise use the 'profile-dir' option.
              """
              stop_aliases = dict(
                  signal='IPClusterStop.signal',
              )
              stop_aliases.update(base_aliases)
              class IPClusterStop(BaseParallelApplication):
                  name = u'ipcluster'
                  description = stop_help
                  examples = _stop_examples
                  signal = Integer(signal.SIGINT, config=True,
                      help="signal to use for stopping processes.")
                  aliases = Dict(stop_aliases)
                  def start(self):
                      """Start the app for the stop subcommand."""
                      try:
                          pid = self.get_pid_from_file()
                      except PIDFileError:
                          self.log.critical(
                              'Could not read pid file, cluster is probably not running.'
                          )
                          # Here I exit with a unusual exit status that other processes
                          # can watch for to learn how I existed.
                          self.remove_pid_file()
                          self.exit(ALREADY_STOPPED)
                      if not self.check_pid(pid):
                          self.log.critical(
                              'Cluster [pid=%r] is not running.' % pid
                          )
                          self.remove_pid_file()
                          # Here I exit with a unusual exit status that other processes
                          # can watch for to learn how I existed.
                          self.exit(ALREADY_STOPPED)
                      elif os.name=='posix':
                          sig = self.signal
                          self.log.info(
                              "Stopping cluster [pid=%r] with [signal=%r]" % (pid, sig)
                          )
                          try:
                              os.kill(pid, sig)
                          except OSError:
                              self.log.error("Stopping cluster failed, assuming already dead.",
                                  exc_info=True)
                              self.remove_pid_file()
                      elif os.name=='nt':
                          try:
                              # kill the whole tree
                              p = check_call(['taskkill', '-pid', str(pid), '-t', '-f'], stdout=PIPE,stderr=PIPE)
                          except (CalledProcessError, OSError):
                              self.log.error("Stopping cluster failed, assuming already dead.",
                                  exc_info=True)
                          self.remove_pid_file()
              engine_aliases = {}
              engine_aliases.update(base_aliases)
              engine_aliases.update(dict(
                  n='IPClusterEngines.n',
                  engines = 'IPClusterEngines.engine_launcher_class',
                  daemonize = 'IPClusterEngines.daemonize',
              ))
              engine_flags = {}
              engine_flags.update(base_flags)
              engine_flags.update(dict(
                  daemonize=(
                      {'IPClusterEngines' : {'daemonize' : True}},
                      """run the cluster into the background (not available on Windows)""",
                  )
              ))
              class IPClusterEngines(BaseParallelApplication):
                  name = u'ipcluster'
                  description = engines_help
                  examples = _engines_examples
                  usage = None
                  default_log_level = logging.INFO
                  classes = List()
                  def _classes_default(self):
                      from IPython.parallel.apps import launcher
                      launchers = launcher.all_launchers
                      eslaunchers = [ l for l in launchers if 'EngineSet' in l.__name__]
                      return [ProfileDir]+eslaunchers
                  n = Integer(num_cpus(), config=True,
                      help="""The number of engines to start. The default is to use one for each
                      CPU on your machine""")
                  engine_launcher = Any(config=True, help="Deprecated, use engine_launcher_class")
                  def _engine_launcher_changed(self, name, old, new):
                      if isinstance(new, string_types):
                          self.log.warn("WARNING: %s.engine_launcher is deprecated as of 0.12,"
                                  " use engine_launcher_class" % self.__class__.__name__)
                          self.engine_launcher_class = new
                  engine_launcher_class = DottedObjectName('LocalEngineSetLauncher',
                      config=True,
                      help="""The class for launching a set of Engines. Change this value
                      to use various batch systems to launch your engines, such as PBS,SGE,MPI,etc.
                      Each launcher class has its own set of configuration options, for making sure
                      it will work in your environment.
                      You can also write your own launcher, and specify it's absolute import path,
                      as in 'mymodule.launcher.FTLEnginesLauncher`.
                      IPython's bundled examples include:
                          Local : start engines locally as subprocesses [default]
                          MPI : use mpiexec to launch engines in an MPI environment
                          PBS : use PBS (qsub) to submit engines to a batch queue
                          SGE : use SGE (qsub) to submit engines to a batch queue
                          LSF : use LSF (bsub) to submit engines to a batch queue
                          SSH : use SSH to start the controller
                                      Note that SSH does *not* move the connection files
                                      around, so you will likely have to do this manually
                                      unless the machines are on a shared file system.
                          HTCondor : use HTCondor to submit engines to a batch queue
                          WindowsHPC : use Windows HPC
                      If you are using one of IPython's builtin launchers, you can specify just the
                      prefix, e.g:
                          c.IPClusterEngines.engine_launcher_class = 'SSH'
                      or:
                          ipcluster start --engines=MPI
                      """
                      )
                  daemonize = Bool(False, config=True,
                      help="""Daemonize the ipcluster program. This implies --log-to-file.
                      Not available on Windows.
                      """)
                  def _daemonize_changed(self, name, old, new):
                      if new:
                          self.log_to_file = True
                  early_shutdown = Integer(30, config=True, help="The timeout (in seconds)")
                  _stopping = False
                  aliases = Dict(engine_aliases)
                  flags = Dict(engine_flags)
                  @catch_config_error
                  def initialize(self, argv=None):
                      super(IPClusterEngines, self).initialize(argv)
                      self.init_signal()
                      self.init_launchers()
                  def init_launchers(self):
                      self.engine_launcher = self.build_launcher(self.engine_launcher_class, 'EngineSet')
                  def init_signal(self):
                      # Setup signals
                      signal.signal(signal.SIGINT, self.sigint_handler)
                  def build_launcher(self, clsname, kind=None):
                      """import and instantiate a Launcher based on importstring"""
                      try:
                          klass = find_launcher_class(clsname, kind)
                      except (ImportError, KeyError):
                          self.log.fatal("Could not import launcher class: %r"%clsname)
                          self.exit(1)
                      launcher = klass(
                          work_dir=u'.', parent=self, log=self.log,
                          profile_dir=self.profile_dir.location, cluster_id=self.cluster_id,
                      )
                      return launcher
                  def engines_started_ok(self):
                      self.log.info("Engines appear to have started successfully")
                      self.early_shutdown = 0
                  def start_engines(self):
                      # Some EngineSetLaunchers ignore `n` and use their own engine count, such as SSH:
                      n = getattr(self.engine_launcher, 'engine_count', self.n)
                      self.log.info("Starting %s Engines with %s", n, self.engine_launcher_class)
                      try:
                          self.engine_launcher.start(self.n)
                      except:
                          self.log.exception("Engine start failed")
                          raise
                      self.engine_launcher.on_stop(self.engines_stopped_early)
                      if self.early_shutdown:
-                         ioloop.DelayedCallback(self.engines_started_ok, self.early_shutdown*1000, self.loop).start()
+                         self.loop.add_timeout(self.loop.time() + self.early_shutdown, self.engines_started_ok)
                  def engines_stopped_early(self, r):
                      if self.early_shutdown and not self._stopping:
                          self.log.error("""
                          Engines shutdown early, they probably failed to connect.
                          Check the engine log files for output.
                          If your controller and engines are not on the same machine, you probably
                          have to instruct the controller to listen on an interface other than localhost.
                          You can set this by adding "--ip='*'" to your ControllerLauncher.controller_args.
                          Be sure to read our security docs before instructing your controller to listen on
                          a public interface.
                          """)
                          self.stop_launchers()
                      return self.engines_stopped(r)
                  def engines_stopped(self, r):
                      return self.loop.stop()
                  def stop_engines(self):
                      if self.engine_launcher.running:
                          self.log.info("Stopping Engines...")
                          d = self.engine_launcher.stop()
                          return d
                      else:
                          return None
                  def stop_launchers(self, r=None):
                      if not self._stopping:
                          self._stopping = True
                          self.log.error("IPython cluster: stopping")
                          self.stop_engines()
                          # Wait a few seconds to let things shut down.
-                         dc = ioloop.DelayedCallback(self.loop.stop, 3000, self.loop)
-                         dc.start()
+                         self.loop.add_timeout(self.loop.time() + 3, self.loop.stop)
                  def sigint_handler(self, signum, frame):
                      self.log.debug("SIGINT received, stopping launchers...")
                      self.stop_launchers()
                  def start_logging(self):
                      # Remove old log files of the controller and engine
                      if self.clean_logs:
                          log_dir = self.profile_dir.log_dir
                          for f in os.listdir(log_dir):
                              if re.match(r'ip(engine|controller)-.+\.(log|err|out)',f):
                                  os.remove(os.path.join(log_dir, f))
                  def start(self):
                      """Start the app for the engines subcommand."""
                      self.log.info("IPython cluster: started")
                      # First see if the cluster is already running
                      # Now log and daemonize
                      self.log.info(
                          'Starting engines with [daemon=%r]' % self.daemonize
                      )
                      # TODO: Get daemonize working on Windows or as a Windows Server.
                      if self.daemonize:
                          if os.name=='posix':
                              daemonize()
-                     dc = ioloop.DelayedCallback(self.start_engines, 0, self.loop)
-                     dc.start()
+                     self.loop.add_callback(self.start_engines)
                      # Now write the new pid file AFTER our new forked pid is active.
                      # self.write_pid_file()
                      try:
                          self.loop.start()
                      except KeyboardInterrupt:
                          pass
                      except zmq.ZMQError as e:
                          if e.errno == errno.EINTR:
                              pass
                          else:
                              raise
              start_aliases = {}
              start_aliases.update(engine_aliases)
              start_aliases.update(dict(
                  delay='IPClusterStart.delay',
                  controller = 'IPClusterStart.controller_launcher_class',
              ))
              start_aliases['clean-logs'] = 'IPClusterStart.clean_logs'
              class IPClusterStart(IPClusterEngines):
                  name = u'ipcluster'
                  description = start_help
                  examples = _start_examples
                  default_log_level = logging.INFO
                  auto_create = Bool(True, config=True,
                      help="whether to create the profile_dir if it doesn't exist")
                  classes = List()
                  def _classes_default(self,):
                      from IPython.parallel.apps import launcher
                      return [ProfileDir] + [IPClusterEngines] + launcher.all_launchers
                  clean_logs = Bool(True, config=True,
                      help="whether to cleanup old logs before starting")
                  delay = CFloat(1., config=True,
                      help="delay (in s) between starting the controller and the engines")
                  controller_launcher = Any(config=True, help="Deprecated, use controller_launcher_class")
                  def _controller_launcher_changed(self, name, old, new):
                      if isinstance(new, string_types):
                          # old 0.11-style config
                          self.log.warn("WARNING: %s.controller_launcher is deprecated as of 0.12,"
                                  " use controller_launcher_class" % self.__class__.__name__)
                          self.controller_launcher_class = new
                  controller_launcher_class = DottedObjectName('LocalControllerLauncher',
                      config=True,
                      help="""The class for launching a Controller. Change this value if you want
                      your controller to also be launched by a batch system, such as PBS,SGE,MPI,etc.
                      Each launcher class has its own set of configuration options, for making sure
                      it will work in your environment.
                      Note that using a batch launcher for the controller *does not* put it
                      in the same batch job as the engines, so they will still start separately.
                      IPython's bundled examples include:
                          Local : start engines locally as subprocesses
                          MPI : use mpiexec to launch the controller in an MPI universe
                          PBS : use PBS (qsub) to submit the controller to a batch queue
                          SGE : use SGE (qsub) to submit the controller to a batch queue
                          LSF : use LSF (bsub) to submit the controller to a batch queue
                          HTCondor : use HTCondor to submit the controller to a batch queue
                          SSH : use SSH to start the controller
                          WindowsHPC : use Windows HPC
                      If you are using one of IPython's builtin launchers, you can specify just the
                      prefix, e.g:
                          c.IPClusterStart.controller_launcher_class = 'SSH'
                      or:
                          ipcluster start --controller=MPI
                      """
                      )
                  reset = Bool(False, config=True,
                      help="Whether to reset config files as part of '--create'."
                      )
                  # flags = Dict(flags)
                  aliases = Dict(start_aliases)
                  def init_launchers(self):
                      self.controller_launcher = self.build_launcher(self.controller_launcher_class, 'Controller')
                      self.engine_launcher = self.build_launcher(self.engine_launcher_class, 'EngineSet')
                  def engines_stopped(self, r):
                      """prevent parent.engines_stopped from stopping everything on engine shutdown"""
                      pass
                  def start_controller(self):
                      self.log.info("Starting Controller with %s", self.controller_launcher_class)
                      self.controller_launcher.on_stop(self.stop_launchers)
                      try:
                          self.controller_launcher.start()
                      except:
                          self.log.exception("Controller start failed")
                          raise
                  def stop_controller(self):
                      # self.log.info("In stop_controller")
                      if self.controller_launcher and self.controller_launcher.running:
                          return self.controller_launcher.stop()
                  def stop_launchers(self, r=None):
                      if not self._stopping:
                          self.stop_controller()
                          super(IPClusterStart, self).stop_launchers()
                  def start(self):
                      """Start the app for the start subcommand."""
                      # First see if the cluster is already running
                      try:
                          pid = self.get_pid_from_file()
                      except PIDFileError:
                          pass
                      else:
                          if self.check_pid(pid):
                              self.log.critical(
                                  'Cluster is already running with [pid=%s]. '
                                  'use "ipcluster stop" to stop the cluster.' % pid
                              )
                              # Here I exit with a unusual exit status that other processes
                              # can watch for to learn how I existed.
                              self.exit(ALREADY_STARTED)
                          else:
                              self.remove_pid_file()
                      # Now log and daemonize
                      self.log.info(
                          'Starting ipcluster with [daemon=%r]' % self.daemonize
                      )
                      # TODO: Get daemonize working on Windows or as a Windows Server.
                      if self.daemonize:
                          if os.name=='posix':
                              daemonize()
-                     dc = ioloop.DelayedCallback(self.start_controller, 0, self.loop)
-                     dc.start()
-                     dc = ioloop.DelayedCallback(self.start_engines, 1000*self.delay, self.loop)
-                     dc.start()
+                     def start():
+                         self.start_controller()
+                         self.loop.add_timeout(self.loop.time() + self.delay, self.start_engines)
+                     self.loop.add_callback(start)
                      # Now write the new pid file AFTER our new forked pid is active.
                      self.write_pid_file()
                      try:
                          self.loop.start()
                      except KeyboardInterrupt:
                          pass
                      except zmq.ZMQError as e:
                          if e.errno == errno.EINTR:
                              pass
                          else:
                              raise
                      finally:
                          self.remove_pid_file()
              base='IPython.parallel.apps.ipclusterapp.IPCluster'
              class IPClusterApp(BaseIPythonApplication):
                  name = u'ipcluster'
                  description = _description
                  examples = _main_examples
                  subcommands = {
                              'start' : (base+'Start', start_help),
                              'stop' : (base+'Stop', stop_help),
                              'engines' : (base+'Engines', engines_help),
                  }
                  # no aliases or flags for parent App
                  aliases = Dict()
                  flags = Dict()
                  def start(self):
                      if self.subapp is None:
                          print("No subcommand specified. Must specify one of: %s"%(self.subcommands.keys()))
                          print()
                          self.print_description()
                          self.print_subcommands()
                          self.exit(1)
                      else:
                          return self.subapp.start()
              launch_new_instance = IPClusterApp.launch_instance
              if __name__ == '__main__':
                  launch_new_instance()

IPython/parallel/apps/launcher.py

0 +1 -2

              # encoding: utf-8
              """Facilities for launching IPython processes asynchronously."""
              # Copyright (c) IPython Development Team.
              # Distributed under the terms of the Modified BSD License.
              import copy
              import logging
              import os
              import pipes
              import stat
              import sys
              import time
              # signal imports, handling various platforms, versions
              from signal import SIGINT, SIGTERM
              try:
                  from signal import SIGKILL
              except ImportError:
                  # Windows
                  SIGKILL=SIGTERM
              try:
                  # Windows >= 2.7, 3.2
                  from signal import CTRL_C_EVENT as SIGINT
              except ImportError:
                  pass
              from subprocess import Popen, PIPE, STDOUT
              try:
                  from subprocess import check_output
              except ImportError:
                  # pre-2.7, define check_output with Popen
                  def check_output(*args, **kwargs):
                      kwargs.update(dict(stdout=PIPE))
                      p = Popen(*args, **kwargs)
                      out,err = p.communicate()
                      return out
              from zmq.eventloop import ioloop
              from IPython.config.application import Application
              from IPython.config.configurable import LoggingConfigurable
              from IPython.utils.text import EvalFormatter
              from IPython.utils.traitlets import (
                  Any, Integer, CFloat, List, Unicode, Dict, Instance, HasTraits, CRegExp
              )
              from IPython.utils.encoding import DEFAULT_ENCODING
              from IPython.utils.path import get_home_dir, ensure_dir_exists
              from IPython.utils.process import find_cmd, FindCmdError
              from IPython.utils.py3compat import iteritems, itervalues
              from .win32support import forward_read_events
              from .winhpcjob import IPControllerTask, IPEngineTask, IPControllerJob, IPEngineSetJob
              WINDOWS = os.name == 'nt'
              #-----------------------------------------------------------------------------
              # Paths to the kernel apps
              #-----------------------------------------------------------------------------
              ipcluster_cmd_argv = [sys.executable, "-m", "IPython.parallel.cluster"]
              ipengine_cmd_argv = [sys.executable, "-m", "IPython.parallel.engine"]
              ipcontroller_cmd_argv = [sys.executable, "-m", "IPython.parallel.controller"]
              if WINDOWS and sys.version_info < (3,):
                  # `python -m package` doesn't work on Windows Python 2,
                  # but `python -m module` does.
                  ipengine_cmd_argv = [sys.executable, "-m", "IPython.parallel.apps.ipengineapp"]
                  ipcontroller_cmd_argv = [sys.executable, "-m", "IPython.parallel.apps.ipcontrollerapp"]
              #-----------------------------------------------------------------------------
              # Base launchers and errors
              #-----------------------------------------------------------------------------
              class LauncherError(Exception):
                  pass
              class ProcessStateError(LauncherError):
                  pass
              class UnknownStatus(LauncherError):
                  pass
              class BaseLauncher(LoggingConfigurable):
                  """An asbtraction for starting, stopping and signaling a process."""
                  # In all of the launchers, the work_dir is where child processes will be
                  # run. This will usually be the profile_dir, but may not be. any work_dir
                  # passed into the __init__ method will override the config value.
                  # This should not be used to set the work_dir for the actual engine
                  # and controller. Instead, use their own config files or the
                  # controller_args, engine_args attributes of the launchers to add
                  # the work_dir option.
                  work_dir = Unicode(u'.')
                  loop = Instance('zmq.eventloop.ioloop.IOLoop')
                  start_data = Any()
                  stop_data = Any()
                  def _loop_default(self):
                      return ioloop.IOLoop.instance()
                  def __init__(self, work_dir=u'.', config=None, **kwargs):
                      super(BaseLauncher, self).__init__(work_dir=work_dir, config=config, **kwargs)
                      self.state = 'before' # can be before, running, after
                      self.stop_callbacks = []
                      self.start_data = None
                      self.stop_data = None
                  @property
                  def args(self):
                      """A list of cmd and args that will be used to start the process.
                      This is what is passed to :func:`spawnProcess` and the first element
                      will be the process name.
                      """
                      return self.find_args()
                  def find_args(self):
                      """The ``.args`` property calls this to find the args list.
                      Subcommand should implement this to construct the cmd and args.
                      """
                      raise NotImplementedError('find_args must be implemented in a subclass')
                  @property
                  def arg_str(self):
                      """The string form of the program arguments."""
                      return ' '.join(self.args)
                  @property
                  def running(self):
                      """Am I running."""
                      if self.state == 'running':
                          return True
                      else:
                          return False
                  def start(self):
                      """Start the process."""
                      raise NotImplementedError('start must be implemented in a subclass')
                  def stop(self):
                      """Stop the process and notify observers of stopping.
                      This method will return None immediately.
                      To observe the actual process stopping, see :meth:`on_stop`.
                      """
                      raise NotImplementedError('stop must be implemented in a subclass')
                  def on_stop(self, f):
                      """Register a callback to be called with this Launcher's stop_data
                      when the process actually finishes.
                      """
                      if self.state=='after':
                          return f(self.stop_data)
                      else:
                          self.stop_callbacks.append(f)
                  def notify_start(self, data):
                      """Call this to trigger startup actions.
                      This logs the process startup and sets the state to 'running'.  It is
                      a pass-through so it can be used as a callback.
                      """
                      self.log.debug('Process %r started: %r', self.args[0], data)
                      self.start_data = data
                      self.state = 'running'
                      return data
                  def notify_stop(self, data):
                      """Call this to trigger process stop actions.
                      This logs the process stopping and sets the state to 'after'. Call
                      this to trigger callbacks registered via :meth:`on_stop`."""
                      self.log.debug('Process %r stopped: %r', self.args[0], data)
                      self.stop_data = data
                      self.state = 'after'
                      for i in range(len(self.stop_callbacks)):
                          d = self.stop_callbacks.pop()
                          d(data)
                      return data
                  def signal(self, sig):
                      """Signal the process.
                      Parameters
                      ----------
                      sig : str or int
                          'KILL', 'INT', etc., or any signal number
                      """
                      raise NotImplementedError('signal must be implemented in a subclass')
              class ClusterAppMixin(HasTraits):
                  """MixIn for cluster args as traits"""
                  profile_dir=Unicode('')
                  cluster_id=Unicode('')
                  @property
                  def cluster_args(self):
                      return ['--profile-dir', self.profile_dir, '--cluster-id', self.cluster_id]
              class ControllerMixin(ClusterAppMixin):
                  controller_cmd = List(ipcontroller_cmd_argv, config=True,
                      help="""Popen command to launch ipcontroller.""")
                  # Command line arguments to ipcontroller.
                  controller_args = List(['--log-to-file','--log-level=%i' % logging.INFO], config=True,
                      help="""command-line args to pass to ipcontroller""")
              class EngineMixin(ClusterAppMixin):
                  engine_cmd = List(ipengine_cmd_argv, config=True,
                      help="""command to launch the Engine.""")
                  # Command line arguments for ipengine.
                  engine_args = List(['--log-to-file','--log-level=%i' % logging.INFO], config=True,
                      help="command-line arguments to pass to ipengine"
                  )
              #-----------------------------------------------------------------------------
              # Local process launchers
              #-----------------------------------------------------------------------------
              class LocalProcessLauncher(BaseLauncher):
                  """Start and stop an external process in an asynchronous manner.
                  This will launch the external process with a working directory of
                  ``self.work_dir``.
                  """
                  # This is used to to construct self.args, which is passed to
                  # spawnProcess.
                  cmd_and_args = List([])
                  poll_frequency = Integer(100) # in ms
                  def __init__(self, work_dir=u'.', config=None, **kwargs):
                      super(LocalProcessLauncher, self).__init__(
                          work_dir=work_dir, config=config, **kwargs
                      )
                      self.process = None
                      self.poller = None
                  def find_args(self):
                      return self.cmd_and_args
                  def start(self):
                      self.log.debug("Starting %s: %r", self.__class__.__name__, self.args)
                      if self.state == 'before':
                          self.process = Popen(self.args,
                              stdout=PIPE,stderr=PIPE,stdin=PIPE,
                              env=os.environ,
                              cwd=self.work_dir
                          )
                          if WINDOWS:
                              self.stdout = forward_read_events(self.process.stdout)
                              self.stderr = forward_read_events(self.process.stderr)
                          else:
                              self.stdout = self.process.stdout.fileno()
                              self.stderr = self.process.stderr.fileno()
                          self.loop.add_handler(self.stdout, self.handle_stdout, self.loop.READ)
                          self.loop.add_handler(self.stderr, self.handle_stderr, self.loop.READ)
                          self.poller = ioloop.PeriodicCallback(self.poll, self.poll_frequency, self.loop)
                          self.poller.start()
                          self.notify_start(self.process.pid)
                      else:
                          s = 'The process was already started and has state: %r' % self.state
                          raise ProcessStateError(s)
                  def stop(self):
                      return self.interrupt_then_kill()
                  def signal(self, sig):
                      if self.state == 'running':
                          if WINDOWS and sig != SIGINT:
                              # use Windows tree-kill for better child cleanup
                              check_output(['taskkill', '-pid', str(self.process.pid), '-t', '-f'])
                          else:
                              self.process.send_signal(sig)
                  def interrupt_then_kill(self, delay=2.0):
                      """Send INT, wait a delay and then send KILL."""
                      try:
                          self.signal(SIGINT)
                      except Exception:
                          self.log.debug("interrupt failed")
                          pass
-                     self.killer  = ioloop.DelayedCallback(lambda : self.signal(SIGKILL), delay*1000, self.loop)
-                     self.killer.start()
+                     self.killer  = self.loop.add_timeout(self.loop.time() + delay, lambda : self.signal(SIGKILL))
                  # callbacks, etc:
                  def handle_stdout(self, fd, events):
                      if WINDOWS:
                          line = self.stdout.recv()
                      else:
                          line = self.process.stdout.readline()
                      # a stopped process will be readable but return empty strings
                      if line:
                          self.log.debug(line[:-1])
                      else:
                          self.poll()
                  def handle_stderr(self, fd, events):
                      if WINDOWS:
                          line = self.stderr.recv()
                      else:
                          line = self.process.stderr.readline()
                      # a stopped process will be readable but return empty strings
                      if line:
                          self.log.debug(line[:-1])
                      else:
                          self.poll()
                  def poll(self):
                      status = self.process.poll()
                      if status is not None:
                          self.poller.stop()
                          self.loop.remove_handler(self.stdout)
                          self.loop.remove_handler(self.stderr)
                          self.notify_stop(dict(exit_code=status, pid=self.process.pid))
                      return status
              class LocalControllerLauncher(LocalProcessLauncher, ControllerMixin):
                  """Launch a controller as a regular external process."""
                  def find_args(self):
                      return self.controller_cmd + self.cluster_args + self.controller_args
                  def start(self):
                      """Start the controller by profile_dir."""
                      return super(LocalControllerLauncher, self).start()
              class LocalEngineLauncher(LocalProcessLauncher, EngineMixin):
                  """Launch a single engine as a regular externall process."""
                  def find_args(self):
                      return self.engine_cmd + self.cluster_args + self.engine_args
              class LocalEngineSetLauncher(LocalEngineLauncher):
                  """Launch a set of engines as regular external processes."""
                  delay = CFloat(0.1, config=True,
                      help="""delay (in seconds) between starting each engine after the first.
                      This can help force the engines to get their ids in order, or limit
                      process flood when starting many engines."""
                  )
                  # launcher class
                  launcher_class = LocalEngineLauncher
                  launchers = Dict()
                  stop_data = Dict()
                  def __init__(self, work_dir=u'.', config=None, **kwargs):
                      super(LocalEngineSetLauncher, self).__init__(
                          work_dir=work_dir, config=config, **kwargs
                      )
                      self.stop_data = {}
                  def start(self, n):
                      """Start n engines by profile or profile_dir."""
                      dlist = []
                      for i in range(n):
                          if i > 0:
                              time.sleep(self.delay)
                          el = self.launcher_class(work_dir=self.work_dir, parent=self, log=self.log,
                                                  profile_dir=self.profile_dir, cluster_id=self.cluster_id,
                          )
                          # Copy the engine args over to each engine launcher.
                          el.engine_cmd = copy.deepcopy(self.engine_cmd)
                          el.engine_args = copy.deepcopy(self.engine_args)
                          el.on_stop(self._notice_engine_stopped)
                          d = el.start()
                          self.launchers[i] = el
                          dlist.append(d)
                      self.notify_start(dlist)
                      return dlist
                  def find_args(self):
                      return ['engine set']
                  def signal(self, sig):
                      dlist = []
                      for el in itervalues(self.launchers):
                          d = el.signal(sig)
                          dlist.append(d)
                      return dlist
                  def interrupt_then_kill(self, delay=1.0):
                      dlist = []
                      for el in itervalues(self.launchers):
                          d = el.interrupt_then_kill(delay)
                          dlist.append(d)
                      return dlist
                  def stop(self):
                      return self.interrupt_then_kill()
                  def _notice_engine_stopped(self, data):
                      pid = data['pid']
                      for idx,el in iteritems(self.launchers):
                          if el.process.pid == pid:
                              break
                      self.launchers.pop(idx)
                      self.stop_data[idx] = data
                      if not self.launchers:
                          self.notify_stop(self.stop_data)
              #-----------------------------------------------------------------------------
              # MPI launchers
              #-----------------------------------------------------------------------------
              class MPILauncher(LocalProcessLauncher):
                  """Launch an external process using mpiexec."""
                  mpi_cmd = List(['mpiexec'], config=True,
                      help="The mpiexec command to use in starting the process."
                  )
                  mpi_args = List([], config=True,
                      help="The command line arguments to pass to mpiexec."
                  )
                  program = List(['date'],
                      help="The program to start via mpiexec.")
                  program_args = List([],
                      help="The command line argument to the program."
                  )
                  n = Integer(1)
                  def __init__(self, *args, **kwargs):
                      # deprecation for old MPIExec names:
                      config = kwargs.get('config', {})
                      for oldname in ('MPIExecLauncher', 'MPIExecControllerLauncher', 'MPIExecEngineSetLauncher'):
                          deprecated = config.get(oldname)
                          if deprecated:
                              newname = oldname.replace('MPIExec', 'MPI')
                              config[newname].update(deprecated)
                              self.log.warn("WARNING: %s name has been deprecated, use %s", oldname, newname)
                      super(MPILauncher, self).__init__(*args, **kwargs)
                  def find_args(self):
                      """Build self.args using all the fields."""
                      return self.mpi_cmd + ['-n', str(self.n)] + self.mpi_args + \
                             self.program + self.program_args
                  def start(self, n):
                      """Start n instances of the program using mpiexec."""
                      self.n = n
                      return super(MPILauncher, self).start()
              class MPIControllerLauncher(MPILauncher, ControllerMixin):
                  """Launch a controller using mpiexec."""
                  # alias back to *non-configurable* program[_args] for use in find_args()
                  # this way all Controller/EngineSetLaunchers have the same form, rather
                  # than *some* having `program_args` and others `controller_args`
                  @property
                  def program(self):
                      return self.controller_cmd
                  @property
                  def program_args(self):
                      return self.cluster_args + self.controller_args
                  def start(self):
                      """Start the controller by profile_dir."""
                      return super(MPIControllerLauncher, self).start(1)
              class MPIEngineSetLauncher(MPILauncher, EngineMixin):
                  """Launch engines using mpiexec"""
                  # alias back to *non-configurable* program[_args] for use in find_args()
                  # this way all Controller/EngineSetLaunchers have the same form, rather
                  # than *some* having `program_args` and others `controller_args`
                  @property
                  def program(self):
                      return self.engine_cmd
                  @property
                  def program_args(self):
                      return self.cluster_args + self.engine_args
                  def start(self, n):
                      """Start n engines by profile or profile_dir."""
                      self.n = n
                      return super(MPIEngineSetLauncher, self).start(n)
              # deprecated MPIExec names
              class DeprecatedMPILauncher(object):
                  def warn(self):
                      oldname = self.__class__.__name__
                      newname = oldname.replace('MPIExec', 'MPI')
                      self.log.warn("WARNING: %s name is deprecated, use %s", oldname, newname)
              class MPIExecLauncher(MPILauncher, DeprecatedMPILauncher):
                  """Deprecated, use MPILauncher"""
                  def __init__(self, *args, **kwargs):
                      super(MPIExecLauncher, self).__init__(*args, **kwargs)
                      self.warn()
              class MPIExecControllerLauncher(MPIControllerLauncher, DeprecatedMPILauncher):
                  """Deprecated, use MPIControllerLauncher"""
                  def __init__(self, *args, **kwargs):
                      super(MPIExecControllerLauncher, self).__init__(*args, **kwargs)
                      self.warn()
              class MPIExecEngineSetLauncher(MPIEngineSetLauncher, DeprecatedMPILauncher):
                  """Deprecated, use MPIEngineSetLauncher"""
                  def __init__(self, *args, **kwargs):
                      super(MPIExecEngineSetLauncher, self).__init__(*args, **kwargs)
                      self.warn()
              #-----------------------------------------------------------------------------
              # SSH launchers
              #-----------------------------------------------------------------------------
              # TODO: Get SSH Launcher back to level of sshx in 0.10.2
              class SSHLauncher(LocalProcessLauncher):
                  """A minimal launcher for ssh.
                  To be useful this will probably have to be extended to use the ``sshx``
                  idea for environment variables.  There could be other things this needs
                  as well.
                  """
                  ssh_cmd = List(['ssh'], config=True,
                      help="command for starting ssh")
                  ssh_args = List(['-tt'], config=True,
                      help="args to pass to ssh")
                  scp_cmd = List(['scp'], config=True,
                      help="command for sending files")
                  program = List(['date'],
                      help="Program to launch via ssh")
                  program_args = List([],
                      help="args to pass to remote program")
                  hostname = Unicode('', config=True,
                      help="hostname on which to launch the program")
                  user = Unicode('', config=True,
                      help="username for ssh")
                  location = Unicode('', config=True,
                      help="user@hostname location for ssh in one setting")
                  to_fetch = List([], config=True,
                      help="List of (remote, local) files to fetch after starting")
                  to_send = List([], config=True,
                      help="List of (local, remote) files to send before starting")
                  def _hostname_changed(self, name, old, new):
                      if self.user:
                          self.location = u'%s@%s' % (self.user, new)
                      else:
                          self.location = new
                  def _user_changed(self, name, old, new):
                      self.location = u'%s@%s' % (new, self.hostname)
                  def find_args(self):
                      return self.ssh_cmd + self.ssh_args + [self.location] + \
                             list(map(pipes.quote, self.program + self.program_args))
                  def _send_file(self, local, remote):
                      """send a single file"""
                      full_remote = "%s:%s" % (self.location, remote)
                      for i in range(10):
                          if not os.path.exists(local):
                              self.log.debug("waiting for %s" % local)
                              time.sleep(1)
                          else:
                              break
                      remote_dir = os.path.dirname(remote)
                      self.log.info("ensuring remote %s:%s/ exists", self.location, remote_dir)
                      check_output(self.ssh_cmd + self.ssh_args + \
                          [self.location, 'mkdir', '-p', '--', remote_dir]
                      )
                      self.log.info("sending %s to %s", local, full_remote)
                      check_output(self.scp_cmd + [local, full_remote])
                  def send_files(self):
                      """send our files (called before start)"""
                      if not self.to_send:
                          return
                      for local_file, remote_file in self.to_send:
                          self._send_file(local_file, remote_file)
                  def _fetch_file(self, remote, local):
                      """fetch a single file"""
                      full_remote = "%s:%s" % (self.location, remote)
                      self.log.info("fetching %s from %s", local, full_remote)
                      for i in range(10):
                          # wait up to 10s for remote file to exist
                          check = check_output(self.ssh_cmd + self.ssh_args + \
                              [self.location, 'test -e', remote, "&& echo 'yes' || echo 'no'"])
                          check = check.decode(DEFAULT_ENCODING, 'replace').strip()
                          if check == u'no':
                              time.sleep(1)
                          elif check == u'yes':
                              break
                      local_dir = os.path.dirname(local)
                      ensure_dir_exists(local_dir, 775)
                      check_output(self.scp_cmd + [full_remote, local])
                  def fetch_files(self):
                      """fetch remote files (called after start)"""
                      if not self.to_fetch:
                          return
                      for remote_file, local_file in self.to_fetch:
                          self._fetch_file(remote_file, local_file)
                  def start(self, hostname=None, user=None):
                      if hostname is not None:
                          self.hostname = hostname
                      if user is not None:
                          self.user = user
                      self.send_files()
                      super(SSHLauncher, self).start()
                      self.fetch_files()
                  def signal(self, sig):
                      if self.state == 'running':
                          # send escaped ssh connection-closer
                          self.process.stdin.write('~.')
                          self.process.stdin.flush()
              class SSHClusterLauncher(SSHLauncher, ClusterAppMixin):
                  remote_profile_dir = Unicode('', config=True,
                      help="""The remote profile_dir to use.
                      If not specified, use calling profile, stripping out possible leading homedir.
                      """)
                  def _profile_dir_changed(self, name, old, new):
                      if not self.remote_profile_dir:
                          # trigger remote_profile_dir_default logic again,
                          # in case it was already triggered before profile_dir was set
                          self.remote_profile_dir = self._strip_home(new)
                  @staticmethod
                  def _strip_home(path):
                      """turns /home/you/.ipython/profile_foo into .ipython/profile_foo"""
                      home = get_home_dir()
                      if not home.endswith('/'):
                          home = home+'/'
                      if path.startswith(home):
                          return path[len(home):]
                      else:
                          return path
                  def _remote_profile_dir_default(self):
                      return self._strip_home(self.profile_dir)
                  def _cluster_id_changed(self, name, old, new):
                      if new:
                          raise ValueError("cluster id not supported by SSH launchers")
                  @property
                  def cluster_args(self):
                      return ['--profile-dir', self.remote_profile_dir]
              class SSHControllerLauncher(SSHClusterLauncher, ControllerMixin):
                  # alias back to *non-configurable* program[_args] for use in find_args()
                  # this way all Controller/EngineSetLaunchers have the same form, rather
                  # than *some* having `program_args` and others `controller_args`
                  def _controller_cmd_default(self):
                      return ['ipcontroller']
                  @property
                  def program(self):
                      return self.controller_cmd
                  @property
                  def program_args(self):
                      return self.cluster_args + self.controller_args
                  def _to_fetch_default(self):
                      return [
                          (os.path.join(self.remote_profile_dir, 'security', cf),
                           os.path.join(self.profile_dir, 'security', cf),)
                          for cf in ('ipcontroller-client.json', 'ipcontroller-engine.json')
                      ]
              class SSHEngineLauncher(SSHClusterLauncher, EngineMixin):
                  # alias back to *non-configurable* program[_args] for use in find_args()
                  # this way all Controller/EngineSetLaunchers have the same form, rather
                  # than *some* having `program_args` and others `controller_args`
                  def _engine_cmd_default(self):
                      return ['ipengine']
                  @property
                  def program(self):
                      return self.engine_cmd
                  @property
                  def program_args(self):
                      return self.cluster_args + self.engine_args
                  def _to_send_default(self):
                      return [
                          (os.path.join(self.profile_dir, 'security', cf),
                           os.path.join(self.remote_profile_dir, 'security', cf))
                          for cf in ('ipcontroller-client.json', 'ipcontroller-engine.json')
                      ]
              class SSHEngineSetLauncher(LocalEngineSetLauncher):
                  launcher_class = SSHEngineLauncher
                  engines = Dict(config=True,
                      help="""dict of engines to launch.  This is a dict by hostname of ints,
                      corresponding to the number of engines to start on that host.""")
                  def _engine_cmd_default(self):
                      return ['ipengine']
                  @property
                  def engine_count(self):
                      """determine engine count from `engines` dict"""
                      count = 0
                      for n in itervalues(self.engines):
                          if isinstance(n, (tuple,list)):
                              n,args = n
                          count += n
                      return count
                  def start(self, n):
                      """Start engines by profile or profile_dir.
                      `n` is ignored, and the `engines` config property is used instead.
                      """
                      dlist = []
                      for host, n in iteritems(self.engines):
                          if isinstance(n, (tuple, list)):
                              n, args = n
                          else:
                              args = copy.deepcopy(self.engine_args)
                          if '@' in host:
                              user,host = host.split('@',1)
                          else:
                              user=None
                          for i in range(n):
                              if i > 0:
                                  time.sleep(self.delay)
                              el = self.launcher_class(work_dir=self.work_dir, parent=self, log=self.log,
                                                      profile_dir=self.profile_dir, cluster_id=self.cluster_id,
                              )
                              if i > 0:
                                  # only send files for the first engine on each host
                                  el.to_send = []
                              # Copy the engine args over to each engine launcher.
                              el.engine_cmd = self.engine_cmd
                              el.engine_args = args
                              el.on_stop(self._notice_engine_stopped)
                              d = el.start(user=user, hostname=host)
                              self.launchers[ "%s/%i" % (host,i) ] = el
                              dlist.append(d)
                      self.notify_start(dlist)
                      return dlist
              class SSHProxyEngineSetLauncher(SSHClusterLauncher):
                  """Launcher for calling
                  `ipcluster engines` on a remote machine.
                  Requires that remote profile is already configured.
                  """
                  n = Integer()
                  ipcluster_cmd = List(['ipcluster'], config=True)
                  @property
                  def program(self):
                      return self.ipcluster_cmd + ['engines']
                  @property
                  def program_args(self):
                      return ['-n', str(self.n), '--profile-dir', self.remote_profile_dir]
                  def _to_send_default(self):
                      return [
                          (os.path.join(self.profile_dir, 'security', cf),
                           os.path.join(self.remote_profile_dir, 'security', cf))
                          for cf in ('ipcontroller-client.json', 'ipcontroller-engine.json')
                  ]
                  def start(self, n):
                      self.n = n
                      super(SSHProxyEngineSetLauncher, self).start()
              #-----------------------------------------------------------------------------
              # Windows HPC Server 2008 scheduler launchers
              #-----------------------------------------------------------------------------
              # This is only used on Windows.
              def find_job_cmd():
                  if WINDOWS:
                      try:
                          return find_cmd('job')
                      except (FindCmdError, ImportError):
                          # ImportError will be raised if win32api is not installed
                          return 'job'
                  else:
                      return 'job'
              class WindowsHPCLauncher(BaseLauncher):
                  job_id_regexp = CRegExp(r'\d+', config=True,
                      help="""A regular expression used to get the job id from the output of the
                      submit_command. """
                      )
                  job_file_name = Unicode(u'ipython_job.xml', config=True,
                      help="The filename of the instantiated job script.")
                  # The full path to the instantiated job script. This gets made dynamically
                  # by combining the work_dir with the job_file_name.
                  job_file = Unicode(u'')
                  scheduler = Unicode('', config=True,
                      help="The hostname of the scheduler to submit the job to.")
                  job_cmd = Unicode(find_job_cmd(), config=True,
                      help="The command for submitting jobs.")
                  def __init__(self, work_dir=u'.', config=None, **kwargs):
                      super(WindowsHPCLauncher, self).__init__(
                          work_dir=work_dir, config=config, **kwargs
                      )
                  @property
                  def job_file(self):
                      return os.path.join(self.work_dir, self.job_file_name)
                  def write_job_file(self, n):
                      raise NotImplementedError("Implement write_job_file in a subclass.")
                  def find_args(self):
                      return [u'job.exe']
                  def parse_job_id(self, output):
                      """Take the output of the submit command and return the job id."""
                      m = self.job_id_regexp.search(output)
                      if m is not None:
                          job_id = m.group()
                      else:
                          raise LauncherError("Job id couldn't be determined: %s" % output)
                      self.job_id = job_id
                      self.log.info('Job started with id: %r', job_id)
                      return job_id
                  def start(self, n):
                      """Start n copies of the process using the Win HPC job scheduler."""
                      self.write_job_file(n)
                      args = [
                          'submit',
                          '/jobfile:%s' % self.job_file,
                          '/scheduler:%s' % self.scheduler
                      ]
                      self.log.debug("Starting Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
                      output = check_output([self.job_cmd]+args,
                          env=os.environ,
                          cwd=self.work_dir,
                          stderr=STDOUT
                      )
                      output = output.decode(DEFAULT_ENCODING, 'replace')
                      job_id = self.parse_job_id(output)
                      self.notify_start(job_id)
                      return job_id
                  def stop(self):
                      args = [
                          'cancel',
                          self.job_id,
                          '/scheduler:%s' % self.scheduler
                      ]
                      self.log.info("Stopping Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
                      try:
                          output = check_output([self.job_cmd]+args,
                              env=os.environ,
                              cwd=self.work_dir,
                              stderr=STDOUT
                          )
                          output = output.decode(DEFAULT_ENCODING, 'replace')
                      except:
                          output = u'The job already appears to be stopped: %r' % self.job_id
                      self.notify_stop(dict(job_id=self.job_id, output=output))  # Pass the output of the kill cmd
                      return output
              class WindowsHPCControllerLauncher(WindowsHPCLauncher, ClusterAppMixin):
                  job_file_name = Unicode(u'ipcontroller_job.xml', config=True,
                      help="WinHPC xml job file.")
                  controller_args = List([], config=False,
                      help="extra args to pass to ipcontroller")
                  def write_job_file(self, n):
                      job = IPControllerJob(parent=self)
                      t = IPControllerTask(parent=self)
                      # The tasks work directory is *not* the actual work directory of
                      # the controller. It is used as the base path for the stdout/stderr
                      # files that the scheduler redirects to.
                      t.work_directory = self.profile_dir
                      # Add the profile_dir and from self.start().
                      t.controller_args.extend(self.cluster_args)
                      t.controller_args.extend(self.controller_args)
                      job.add_task(t)
                      self.log.debug("Writing job description file: %s", self.job_file)
                      job.write(self.job_file)
                  @property
                  def job_file(self):
                      return os.path.join(self.profile_dir, self.job_file_name)
                  def start(self):
                      """Start the controller by profile_dir."""
                      return super(WindowsHPCControllerLauncher, self).start(1)
              class WindowsHPCEngineSetLauncher(WindowsHPCLauncher, ClusterAppMixin):
                  job_file_name = Unicode(u'ipengineset_job.xml', config=True,
                      help="jobfile for ipengines job")
                  engine_args = List([], config=False,
                      help="extra args to pas to ipengine")
                  def write_job_file(self, n):
                      job = IPEngineSetJob(parent=self)
                      for i in range(n):
                          t = IPEngineTask(parent=self)
                          # The tasks work directory is *not* the actual work directory of
                          # the engine. It is used as the base path for the stdout/stderr
                          # files that the scheduler redirects to.
                          t.work_directory = self.profile_dir
                          # Add the profile_dir and from self.start().
                          t.engine_args.extend(self.cluster_args)
                          t.engine_args.extend(self.engine_args)
                          job.add_task(t)
                      self.log.debug("Writing job description file: %s", self.job_file)
                      job.write(self.job_file)
                  @property
                  def job_file(self):
                      return os.path.join(self.profile_dir, self.job_file_name)
                  def start(self, n):
                      """Start the controller by profile_dir."""
                      return super(WindowsHPCEngineSetLauncher, self).start(n)
              #-----------------------------------------------------------------------------
              # Batch (PBS) system launchers
              #-----------------------------------------------------------------------------
              class BatchClusterAppMixin(ClusterAppMixin):
                  """ClusterApp mixin that updates the self.context dict, rather than cl-args."""
                  def _profile_dir_changed(self, name, old, new):
                      self.context[name] = new
                  _cluster_id_changed = _profile_dir_changed
                  def _profile_dir_default(self):
                      self.context['profile_dir'] = ''
                      return ''
                  def _cluster_id_default(self):
                      self.context['cluster_id'] = ''
                      return ''
              class BatchSystemLauncher(BaseLauncher):
                  """Launch an external process using a batch system.
                  This class is designed to work with UNIX batch systems like PBS, LSF,
                  GridEngine, etc.  The overall model is that there are different commands
                  like qsub, qdel, etc. that handle the starting and stopping of the process.
                  This class also has the notion of a batch script. The ``batch_template``
                  attribute can be set to a string that is a template for the batch script.
                  This template is instantiated using string formatting. Thus the template can
                  use {n} fot the number of instances. Subclasses can add additional variables
                  to the template dict.
                  """
                  # Subclasses must fill these in.  See PBSEngineSet
                  submit_command = List([''], config=True,
                      help="The name of the command line program used to submit jobs.")
                  delete_command = List([''], config=True,
                      help="The name of the command line program used to delete jobs.")
                  job_id_regexp = CRegExp('', config=True,
                      help="""A regular expression used to get the job id from the output of the
                      submit_command.""")
                  job_id_regexp_group = Integer(0, config=True,
                      help="""The group we wish to match in job_id_regexp (0 to match all)""")
                  batch_template = Unicode('', config=True,
                      help="The string that is the batch script template itself.")
                  batch_template_file = Unicode(u'', config=True,
                      help="The file that contains the batch template.")
                  batch_file_name = Unicode(u'batch_script', config=True,
                      help="The filename of the instantiated batch script.")
                  queue = Unicode(u'', config=True,
                      help="The PBS Queue.")
                  def _queue_changed(self, name, old, new):
                      self.context[name] = new
                  n = Integer(1)
                  _n_changed = _queue_changed
                  # not configurable, override in subclasses
                  # PBS Job Array regex
                  job_array_regexp = CRegExp('')
                  job_array_template = Unicode('')
                  # PBS Queue regex
                  queue_regexp = CRegExp('')
                  queue_template = Unicode('')
                  # The default batch template, override in subclasses
                  default_template = Unicode('')
                  # The full path to the instantiated batch script.
                  batch_file = Unicode(u'')
                  # the format dict used with batch_template:
                  context = Dict()
                  def _context_default(self):
                      """load the default context with the default values for the basic keys
                      because the _trait_changed methods only load the context if they
                      are set to something other than the default value.
                      """
                      return dict(n=1, queue=u'', profile_dir=u'', cluster_id=u'')
                  # the Formatter instance for rendering the templates:
                  formatter = Instance(EvalFormatter, (), {})
                  def find_args(self):
                      return self.submit_command + [self.batch_file]
                  def __init__(self, work_dir=u'.', config=None, **kwargs):
                      super(BatchSystemLauncher, self).__init__(
                          work_dir=work_dir, config=config, **kwargs
                      )
                      self.batch_file = os.path.join(self.work_dir, self.batch_file_name)
                  def parse_job_id(self, output):
                      """Take the output of the submit command and return the job id."""
                      m = self.job_id_regexp.search(output)
                      if m is not None:
                          job_id = m.group(self.job_id_regexp_group)
                      else:
                          raise LauncherError("Job id couldn't be determined: %s" % output)
                      self.job_id = job_id
                      self.log.info('Job submitted with job id: %r', job_id)
                      return job_id
                  def write_batch_script(self, n):
                      """Instantiate and write the batch script to the work_dir."""
                      self.n = n
                      # first priority is batch_template if set
                      if self.batch_template_file and not self.batch_template:
                          # second priority is batch_template_file
                          with open(self.batch_template_file) as f:
                              self.batch_template = f.read()
                      if not self.batch_template:
                          # third (last) priority is default_template
                          self.batch_template = self.default_template
                          # add jobarray or queue lines to user-specified template
                          # note that this is *only* when user did not specify a template.
                          self._insert_queue_in_script()
                          self._insert_job_array_in_script()
                      script_as_string = self.formatter.format(self.batch_template, **self.context)
                      self.log.debug('Writing batch script: %s', self.batch_file)
                      with open(self.batch_file, 'w') as f:
                          f.write(script_as_string)
                      os.chmod(self.batch_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
                  def _insert_queue_in_script(self):
                      """Inserts a queue if required into the batch script.
                      """
                      if self.queue and not self.queue_regexp.search(self.batch_template):
                          self.log.debug("adding PBS queue settings to batch script")
                          firstline, rest = self.batch_template.split('\n',1)
                          self.batch_template = u'\n'.join([firstline, self.queue_template, rest])
                  def _insert_job_array_in_script(self):
                      """Inserts a job array if required into the batch script.
                      """
                      if not self.job_array_regexp.search(self.batch_template):
                          self.log.debug("adding job array settings to batch script")
                          firstline, rest = self.batch_template.split('\n',1)
                          self.batch_template = u'\n'.join([firstline, self.job_array_template, rest])
                  def start(self, n):
                      """Start n copies of the process using a batch system."""
                      self.log.debug("Starting %s: %r", self.__class__.__name__, self.args)
                      # Here we save profile_dir in the context so they
                      # can be used in the batch script template as {profile_dir}
                      self.write_batch_script(n)
                      output = check_output(self.args, env=os.environ)
                      output = output.decode(DEFAULT_ENCODING, 'replace')
                      job_id = self.parse_job_id(output)
                      self.notify_start(job_id)
                      return job_id
                  def stop(self):
                      try:
                          p = Popen(self.delete_command+[self.job_id], env=os.environ,
                                    stdout=PIPE, stderr=PIPE)
                          out, err = p.communicate()
                          output = out + err
                      except:
                          self.log.exception("Problem stopping cluster with command: %s" %
                                             (self.delete_command + [self.job_id]))
                          output = ""
                      output = output.decode(DEFAULT_ENCODING, 'replace')
                      self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd
                      return output
              class PBSLauncher(BatchSystemLauncher):
                  """A BatchSystemLauncher subclass for PBS."""
                  submit_command = List(['qsub'], config=True,
                      help="The PBS submit command ['qsub']")
                  delete_command = List(['qdel'], config=True,
                      help="The PBS delete command ['qsub']")
                  job_id_regexp = CRegExp(r'\d+', config=True,
                      help="Regular expresion for identifying the job ID [r'\d+']")
                  batch_file = Unicode(u'')
                  job_array_regexp = CRegExp('#PBS\W+-t\W+[\w\d\-\$]+')
                  job_array_template = Unicode('#PBS -t 1-{n}')
                  queue_regexp = CRegExp('#PBS\W+-q\W+\$?\w+')
                  queue_template = Unicode('#PBS -q {queue}')
              class PBSControllerLauncher(PBSLauncher, BatchClusterAppMixin):
                  """Launch a controller using PBS."""
                  batch_file_name = Unicode(u'pbs_controller', config=True,
                      help="batch file name for the controller job.")
                  default_template= Unicode("""#!/bin/sh
              #PBS -V
              #PBS -N ipcontroller
              %s --log-to-file --profile-dir="{profile_dir}" --cluster-id="{cluster_id}"
              """%(' '.join(map(pipes.quote, ipcontroller_cmd_argv))))
                  def start(self):
                      """Start the controller by profile or profile_dir."""
                      return super(PBSControllerLauncher, self).start(1)
              class PBSEngineSetLauncher(PBSLauncher, BatchClusterAppMixin):
                  """Launch Engines using PBS"""
                  batch_file_name = Unicode(u'pbs_engines', config=True,
                      help="batch file name for the engine(s) job.")
                  default_template= Unicode(u"""#!/bin/sh
              #PBS -V
              #PBS -N ipengine
              %s --profile-dir="{profile_dir}" --cluster-id="{cluster_id}"
              """%(' '.join(map(pipes.quote,ipengine_cmd_argv))))
              #SGE is very similar to PBS
              class SGELauncher(PBSLauncher):
                  """Sun GridEngine is a PBS clone with slightly different syntax"""
                  job_array_regexp = CRegExp('#\$\W+\-t')
                  job_array_template = Unicode('#$ -t 1-{n}')
                  queue_regexp = CRegExp('#\$\W+-q\W+\$?\w+')
                  queue_template = Unicode('#$ -q {queue}')
              class SGEControllerLauncher(SGELauncher, BatchClusterAppMixin):
                  """Launch a controller using SGE."""
                  batch_file_name = Unicode(u'sge_controller', config=True,
                      help="batch file name for the ipontroller job.")
                  default_template= Unicode(u"""#$ -V
              #$ -S /bin/sh
              #$ -N ipcontroller
              %s --log-to-file --profile-dir="{profile_dir}" --cluster-id="{cluster_id}"
              """%(' '.join(map(pipes.quote, ipcontroller_cmd_argv))))
                  def start(self):
                      """Start the controller by profile or profile_dir."""
                      return super(SGEControllerLauncher, self).start(1)
              class SGEEngineSetLauncher(SGELauncher, BatchClusterAppMixin):
                  """Launch Engines with SGE"""
                  batch_file_name = Unicode(u'sge_engines', config=True,
                      help="batch file name for the engine(s) job.")
                  default_template = Unicode("""#$ -V
              #$ -S /bin/sh
              #$ -N ipengine
              %s --profile-dir="{profile_dir}" --cluster-id="{cluster_id}"
              """%(' '.join(map(pipes.quote, ipengine_cmd_argv))))
              # LSF launchers
              class LSFLauncher(BatchSystemLauncher):
                  """A BatchSystemLauncher subclass for LSF."""
                  submit_command = List(['bsub'], config=True,
                                        help="The PBS submit command ['bsub']")
                  delete_command = List(['bkill'], config=True,
                                        help="The PBS delete command ['bkill']")
                  job_id_regexp = CRegExp(r'\d+', config=True,
                                          help="Regular expresion for identifying the job ID [r'\d+']")
                  batch_file = Unicode(u'')
                  job_array_regexp = CRegExp('#BSUB[ \t]-J+\w+\[\d+-\d+\]')
                  job_array_template = Unicode('#BSUB -J ipengine[1-{n}]')
                  queue_regexp = CRegExp('#BSUB[ \t]+-q[ \t]+\w+')
                  queue_template = Unicode('#BSUB -q {queue}')
                  def start(self, n):
                      """Start n copies of the process using LSF batch system.
                      This cant inherit from the base class because bsub expects
                      to be piped a shell script in order to honor the #BSUB directives :
                      bsub < script
                      """
                      # Here we save profile_dir in the context so they
                      # can be used in the batch script template as {profile_dir}
                      self.write_batch_script(n)
                      piped_cmd = self.args[0]+'<\"'+self.args[1]+'\"'
                      self.log.debug("Starting %s: %s", self.__class__.__name__, piped_cmd)
                      p = Popen(piped_cmd, shell=True,env=os.environ,stdout=PIPE)
                      output,err = p.communicate()
                      output = output.decode(DEFAULT_ENCODING, 'replace')
                      job_id = self.parse_job_id(output)
                      self.notify_start(job_id)
                      return job_id
              class LSFControllerLauncher(LSFLauncher, BatchClusterAppMixin):
                  """Launch a controller using LSF."""
                  batch_file_name = Unicode(u'lsf_controller', config=True,
                                            help="batch file name for the controller job.")
                  default_template= Unicode("""#!/bin/sh
                  #BSUB -J ipcontroller
                  #BSUB -oo ipcontroller.o.%%J
                  #BSUB -eo ipcontroller.e.%%J
                  %s --log-to-file --profile-dir="{profile_dir}" --cluster-id="{cluster_id}"
                  """%(' '.join(map(pipes.quote,ipcontroller_cmd_argv))))
                  def start(self):
                      """Start the controller by profile or profile_dir."""
                      return super(LSFControllerLauncher, self).start(1)
              class LSFEngineSetLauncher(LSFLauncher, BatchClusterAppMixin):
                  """Launch Engines using LSF"""
                  batch_file_name = Unicode(u'lsf_engines', config=True,
                                            help="batch file name for the engine(s) job.")
                  default_template= Unicode(u"""#!/bin/sh
                  #BSUB -oo ipengine.o.%%J
                  #BSUB -eo ipengine.e.%%J
                  %s --profile-dir="{profile_dir}" --cluster-id="{cluster_id}"
                  """%(' '.join(map(pipes.quote, ipengine_cmd_argv))))
              class HTCondorLauncher(BatchSystemLauncher):
                  """A BatchSystemLauncher subclass for HTCondor.
                  HTCondor requires that we launch the ipengine/ipcontroller scripts rather
                  that the python instance but otherwise is very similar to PBS.  This is because
                  HTCondor destroys sys.executable when launching remote processes - a launched
                  python process depends on sys.executable to effectively evaluate its
                  module search paths. Without it, regardless of which python interpreter you launch
                  you will get the to built in module search paths.
                  We use the ip{cluster, engine, controller} scripts as our executable to circumvent
                  this - the mechanism of shebanged scripts means that the python binary will be
                  launched with argv[0] set to the *location of the ip{cluster, engine, controller}
                  scripts on the remote node*. This means you need to take care that:
                  a. Your remote nodes have their paths configured correctly, with the ipengine and ipcontroller
                     of the python environment you wish to execute code in having top precedence.
                  b. This functionality is untested on Windows.
                  If you need different behavior, consider making you own template.
                  """
                  submit_command = List(['condor_submit'], config=True,
                      help="The HTCondor submit command ['condor_submit']")
                  delete_command = List(['condor_rm'], config=True,
                      help="The HTCondor delete command ['condor_rm']")
                  job_id_regexp = CRegExp(r'(\d+)\.$', config=True,
                      help="Regular expression for identifying the job ID [r'(\d+)\.$']")
                  job_id_regexp_group = Integer(1, config=True,
                      help="""The group we wish to match in job_id_regexp [1]""")
                  job_array_regexp = CRegExp('queue\W+\$')
                  job_array_template = Unicode('queue {n}')
                  def _insert_job_array_in_script(self):
                      """Inserts a job array if required into the batch script.
                      """
                      if not self.job_array_regexp.search(self.batch_template):
                          self.log.debug("adding job array settings to batch script")
                          #HTCondor requires that the job array goes at the bottom of the script
                          self.batch_template = '\n'.join([self.batch_template,
                              self.job_array_template])
                  def _insert_queue_in_script(self):
                      """AFAIK, HTCondor doesn't have a concept of multiple queues that can be
                      specified in the script.
                      """
                      pass
              class HTCondorControllerLauncher(HTCondorLauncher, BatchClusterAppMixin):
                  """Launch a controller using HTCondor."""
                  batch_file_name = Unicode(u'htcondor_controller', config=True,
                                            help="batch file name for the controller job.")
                  default_template = Unicode(r"""
              universe        = vanilla
              executable      = ipcontroller
              # by default we expect a shared file system
              transfer_executable = False
              arguments       = --log-to-file '--profile-dir={profile_dir}' --cluster-id='{cluster_id}'
              """)
                  def start(self):
                      """Start the controller by profile or profile_dir."""
                      return super(HTCondorControllerLauncher, self).start(1)
              class HTCondorEngineSetLauncher(HTCondorLauncher, BatchClusterAppMixin):
                  """Launch Engines using HTCondor"""
                  batch_file_name = Unicode(u'htcondor_engines', config=True,
                                            help="batch file name for the engine(s) job.")
                  default_template = Unicode("""
              universe        = vanilla
              executable      = ipengine
              # by default we expect a shared file system
              transfer_executable = False
              arguments       = "--log-to-file '--profile-dir={profile_dir}' '--cluster-id={cluster_id}'"
              """)
              #-----------------------------------------------------------------------------
              # A launcher for ipcluster itself!
              #-----------------------------------------------------------------------------
              class IPClusterLauncher(LocalProcessLauncher):
                  """Launch the ipcluster program in an external process."""
                  ipcluster_cmd = List(ipcluster_cmd_argv, config=True,
                      help="Popen command for ipcluster")
                  ipcluster_args = List(
                      ['--clean-logs=True', '--log-to-file', '--log-level=%i'%logging.INFO], config=True,
                      help="Command line arguments to pass to ipcluster.")
                  ipcluster_subcommand = Unicode('start')
                  profile = Unicode('default')
                  n = Integer(2)
                  def find_args(self):
                      return self.ipcluster_cmd + [self.ipcluster_subcommand] + \
                          ['--n=%i'%self.n, '--profile=%s'%self.profile] + \
                          self.ipcluster_args
                  def start(self):
                      return super(IPClusterLauncher, self).start()
              #-----------------------------------------------------------------------------
              # Collections of launchers
              #-----------------------------------------------------------------------------
              local_launchers = [
                  LocalControllerLauncher,
                  LocalEngineLauncher,
                  LocalEngineSetLauncher,
              ]
              mpi_launchers = [
                  MPILauncher,
                  MPIControllerLauncher,
                  MPIEngineSetLauncher,
              ]
              ssh_launchers = [
                  SSHLauncher,
                  SSHControllerLauncher,
                  SSHEngineLauncher,
                  SSHEngineSetLauncher,
                  SSHProxyEngineSetLauncher,
              ]
              winhpc_launchers = [
                  WindowsHPCLauncher,
                  WindowsHPCControllerLauncher,
                  WindowsHPCEngineSetLauncher,
              ]
              pbs_launchers = [
                  PBSLauncher,
                  PBSControllerLauncher,
                  PBSEngineSetLauncher,
              ]
              sge_launchers = [
                  SGELauncher,
                  SGEControllerLauncher,
                  SGEEngineSetLauncher,
              ]
              lsf_launchers = [
                  LSFLauncher,
                  LSFControllerLauncher,
                  LSFEngineSetLauncher,
              ]
              htcondor_launchers = [
                  HTCondorLauncher,
                  HTCondorControllerLauncher,
                  HTCondorEngineSetLauncher,
              ]
              all_launchers = local_launchers + mpi_launchers + ssh_launchers + winhpc_launchers\
                              + pbs_launchers + sge_launchers + lsf_launchers + htcondor_launchers

IPython/parallel/controller/hub.py

0 +17 -28

              """The IPython Controller Hub with 0MQ
              This is the master object that handles connections from engines and clients,
              and monitors traffic through the various queues.
              """
              # Copyright (c) IPython Development Team.
              # Distributed under the terms of the Modified BSD License.
              from __future__ import print_function
              import json
              import os
              import sys
              import time
              from datetime import datetime
              import zmq
-             from zmq.eventloop import ioloop
              from zmq.eventloop.zmqstream import ZMQStream
              # internal:
              from IPython.utils.importstring import import_item
              from IPython.utils.jsonutil import extract_dates
              from IPython.utils.localinterfaces import localhost
              from IPython.utils.py3compat import cast_bytes, unicode_type, iteritems
              from IPython.utils.traitlets import (
-                     HasTraits, Instance, Integer, Unicode, Dict, Set, Tuple, CBytes, DottedObjectName
+                     HasTraits, Any, Instance, Integer, Unicode, Dict, Set, Tuple, DottedObjectName
                      )
              from IPython.parallel import error, util
              from IPython.parallel.factory import RegistrationFactory
              from IPython.kernel.zmq.session import SessionFactory
              from .heartmonitor import HeartMonitor
-             #-----------------------------------------------------------------------------
-             # Code
-             #-----------------------------------------------------------------------------
              def _passer(*args, **kwargs):
                  return
              def _printer(*args, **kwargs):
                  print (args)
                  print (kwargs)
              def empty_record():
                  """Return an empty dict with all record keys."""
                  return {
                      'msg_id' : None,
                      'header' : None,
                      'metadata' : None,
                      'content': None,
                      'buffers': None,
                      'submitted': None,
                      'client_uuid' : None,
                      'engine_uuid' : None,
                      'started': None,
                      'completed': None,
                      'resubmitted': None,
                      'received': None,
                      'result_header' : None,
                      'result_metadata' : None,
                      'result_content' : None,
                      'result_buffers' : None,
                      'queue' : None,
                      'execute_input' : None,
                      'execute_result': None,
                      'error': None,
                      'stdout': '',
                      'stderr': '',
                  }
              def init_record(msg):
                  """Initialize a TaskRecord based on a request."""
                  header = msg['header']
                  return {
                      'msg_id' : header['msg_id'],
                      'header' : header,
                      'content': msg['content'],
                      'metadata': msg['metadata'],
                      'buffers': msg['buffers'],
                      'submitted': header['date'],
                      'client_uuid' : None,
                      'engine_uuid' : None,
                      'started': None,
                      'completed': None,
                      'resubmitted': None,
                      'received': None,
                      'result_header' : None,
                      'result_metadata': None,
                      'result_content' : None,
                      'result_buffers' : None,
                      'queue' : None,
                      'execute_input' : None,
                      'execute_result': None,
                      'error': None,
                      'stdout': '',
                      'stderr': '',
                  }
              class EngineConnector(HasTraits):
                  """A simple object for accessing the various zmq connections of an object.
                  Attributes are:
                  id (int): engine ID
                  uuid (unicode): engine UUID
                  pending: set of msg_ids
-                 stallback: DelayedCallback for stalled registration
+                 stallback: tornado timeout for stalled registration
                  """
                  id = Integer(0)
                  uuid = Unicode()
                  pending = Set()
-                 stallback = Instance(ioloop.DelayedCallback)
+                 stallback = Any()
              _db_shortcuts = {
                  'sqlitedb' : 'IPython.parallel.controller.sqlitedb.SQLiteDB',
                  'mongodb'  : 'IPython.parallel.controller.mongodb.MongoDB',
                  'dictdb'   : 'IPython.parallel.controller.dictdb.DictDB',
                  'nodb'     : 'IPython.parallel.controller.dictdb.NoDB',
              }
              class HubFactory(RegistrationFactory):
                  """The Configurable for setting up a Hub."""
                  # port-pairs for monitoredqueues:
                  hb = Tuple(Integer,Integer,config=True,
                      help="""PUB/ROUTER Port pair for Engine heartbeats""")
                  def _hb_default(self):
                      return tuple(util.select_random_ports(2))
                  mux = Tuple(Integer,Integer,config=True,
                      help="""Client/Engine Port pair for MUX queue""")
                  def _mux_default(self):
                      return tuple(util.select_random_ports(2))
                  task = Tuple(Integer,Integer,config=True,
                      help="""Client/Engine Port pair for Task queue""")
                  def _task_default(self):
                      return tuple(util.select_random_ports(2))
                  control = Tuple(Integer,Integer,config=True,
                      help="""Client/Engine Port pair for Control queue""")
                  def _control_default(self):
                      return tuple(util.select_random_ports(2))
                  iopub = Tuple(Integer,Integer,config=True,
                      help="""Client/Engine Port pair for IOPub relay""")
                  def _iopub_default(self):
                      return tuple(util.select_random_ports(2))
                  # single ports:
                  mon_port = Integer(config=True,
                      help="""Monitor (SUB) port for queue traffic""")
                  def _mon_port_default(self):
                      return util.select_random_ports(1)[0]
                  notifier_port = Integer(config=True,
                      help="""PUB port for sending engine status notifications""")
                  def _notifier_port_default(self):
                      return util.select_random_ports(1)[0]
                  engine_ip = Unicode(config=True,
                      help="IP on which to listen for engine connections. [default: loopback]")
                  def _engine_ip_default(self):
                      return localhost()
                  engine_transport = Unicode('tcp', config=True,
                      help="0MQ transport for engine connections. [default: tcp]")
                  client_ip = Unicode(config=True,
                      help="IP on which to listen for client connections. [default: loopback]")
                  client_transport = Unicode('tcp', config=True,
                      help="0MQ transport for client connections. [default : tcp]")
                  monitor_ip = Unicode(config=True,
                      help="IP on which to listen for monitor messages. [default: loopback]")
                  monitor_transport = Unicode('tcp', config=True,
                      help="0MQ transport for monitor messages. [default : tcp]")
                  _client_ip_default = _monitor_ip_default = _engine_ip_default
                  monitor_url = Unicode('')
                  db_class = DottedObjectName('NoDB',
                      config=True, help="""The class to use for the DB backend
                      Options include:
                      SQLiteDB: SQLite
                      MongoDB : use MongoDB
                      DictDB  : in-memory storage (fastest, but be mindful of memory growth of the Hub)
                      NoDB    : disable database altogether (default)
                      """)
                  registration_timeout = Integer(0, config=True,
                      help="Engine registration timeout in seconds [default: max(30,"
                           "10*heartmonitor.period)]" )
                  def _registration_timeout_default(self):
                      if self.heartmonitor is None:
                          # early initialization, this value will be ignored
                          return 0
                          # heartmonitor period is in milliseconds, so 10x in seconds is .01
                      return max(30, int(.01 * self.heartmonitor.period))
                  # not configurable
                  db = Instance('IPython.parallel.controller.dictdb.BaseDB')
                  heartmonitor = Instance('IPython.parallel.controller.heartmonitor.HeartMonitor')
                  def _ip_changed(self, name, old, new):
                      self.engine_ip = new
                      self.client_ip = new
                      self.monitor_ip = new
                      self._update_monitor_url()
                  def _update_monitor_url(self):
                      self.monitor_url = "%s://%s:%i" % (self.monitor_transport, self.monitor_ip, self.mon_port)
                  def _transport_changed(self, name, old, new):
                      self.engine_transport = new
                      self.client_transport = new
                      self.monitor_transport = new
                      self._update_monitor_url()
                  def __init__(self, **kwargs):
                      super(HubFactory, self).__init__(**kwargs)
                      self._update_monitor_url()
                  def construct(self):
                      self.init_hub()
                  def start(self):
                      self.heartmonitor.start()
                      self.log.info("Heartmonitor started")
                  def client_url(self, channel):
                      """return full zmq url for a named client channel"""
                      return "%s://%s:%i" % (self.client_transport, self.client_ip, self.client_info[channel])
                  def engine_url(self, channel):
                      """return full zmq url for a named engine channel"""
                      return "%s://%s:%i" % (self.engine_transport, self.engine_ip, self.engine_info[channel])
                  def init_hub(self):
                      """construct Hub object"""
                      ctx = self.context
                      loop = self.loop
                      if 'TaskScheduler.scheme_name' in self.config:
                          scheme = self.config.TaskScheduler.scheme_name
                      else:
                          from .scheduler import TaskScheduler
                          scheme = TaskScheduler.scheme_name.get_default_value()
                      # build connection dicts
                      engine = self.engine_info = {
                          'interface'     : "%s://%s" % (self.engine_transport, self.engine_ip),
                          'registration'  : self.regport,
                          'control'       : self.control[1],
                          'mux'           : self.mux[1],
                          'hb_ping'       : self.hb[0],
                          'hb_pong'       : self.hb[1],
                          'task'          : self.task[1],
                          'iopub'         : self.iopub[1],
                          }
                      client = self.client_info = {
                          'interface'     : "%s://%s" % (self.client_transport, self.client_ip),
                          'registration'  : self.regport,
                          'control'       : self.control[0],
                          'mux'           : self.mux[0],
                          'task'          : self.task[0],
                          'task_scheme'   : scheme,
                          'iopub'         : self.iopub[0],
                          'notification'  : self.notifier_port,
                          }
                      self.log.debug("Hub engine addrs: %s", self.engine_info)
                      self.log.debug("Hub client addrs: %s", self.client_info)
                      # Registrar socket
                      q = ZMQStream(ctx.socket(zmq.ROUTER), loop)
                      util.set_hwm(q, 0)
                      q.bind(self.client_url('registration'))
                      self.log.info("Hub listening on %s for registration.", self.client_url('registration'))
                      if self.client_ip != self.engine_ip:
                          q.bind(self.engine_url('registration'))
                          self.log.info("Hub listening on %s for registration.", self.engine_url('registration'))
                      ### Engine connections ###
                      # heartbeat
                      hpub = ctx.socket(zmq.PUB)
                      hpub.bind(self.engine_url('hb_ping'))
                      hrep = ctx.socket(zmq.ROUTER)
                      util.set_hwm(hrep, 0)
                      hrep.bind(self.engine_url('hb_pong'))
                      self.heartmonitor = HeartMonitor(loop=loop, parent=self, log=self.log,
                                              pingstream=ZMQStream(hpub,loop),
                                              pongstream=ZMQStream(hrep,loop)
                                          )
                      ### Client connections ###
                      # Notifier socket
                      n = ZMQStream(ctx.socket(zmq.PUB), loop)
                      n.bind(self.client_url('notification'))
                      ### build and launch the queues ###
                      # monitor socket
                      sub = ctx.socket(zmq.SUB)
                      sub.setsockopt(zmq.SUBSCRIBE, b"")
                      sub.bind(self.monitor_url)
                      sub.bind('inproc://monitor')
                      sub = ZMQStream(sub, loop)
                      # connect the db
                      db_class = _db_shortcuts.get(self.db_class.lower(), self.db_class)
                      self.log.info('Hub using DB backend: %r', (db_class.split('.')[-1]))
                      self.db = import_item(str(db_class))(session=self.session.session,
                                                          parent=self, log=self.log)
                      time.sleep(.25)
                      # resubmit stream
                      r = ZMQStream(ctx.socket(zmq.DEALER), loop)
                      url = util.disambiguate_url(self.client_url('task'))
                      r.connect(url)
-                     # convert seconds to msec
-                     registration_timeout = 1000*self.registration_timeout
                      self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
                              query=q, notifier=n, resubmit=r, db=self.db,
                              engine_info=self.engine_info, client_info=self.client_info,
-                             log=self.log, registration_timeout=registration_timeout)
+                             log=self.log, registration_timeout=self.registration_timeout)
              class Hub(SessionFactory):
                  """The IPython Controller Hub with 0MQ connections
                  Parameters
                  ==========
                  loop: zmq IOLoop instance
                  session: Session object
                  <removed> context: zmq context for creating new connections (?)
                  queue: ZMQStream for monitoring the command queue (SUB)
                  query: ZMQStream for engine registration and client queries requests (ROUTER)
                  heartbeat: HeartMonitor object checking the pulse of the engines
                  notifier: ZMQStream for broadcasting engine registration changes (PUB)
                  db: connection to db for out of memory logging of commands
                              NotImplemented
                  engine_info: dict of zmq connection information for engines to connect
                              to the queues.
                  client_info: dict of zmq connection information for engines to connect
                              to the queues.
                  """
                  engine_state_file = Unicode()
                  # internal data structures:
                  ids=Set() # engine IDs
                  keytable=Dict()
                  by_ident=Dict()
                  engines=Dict()
                  clients=Dict()
                  hearts=Dict()
                  pending=Set()
                  queues=Dict()  # pending msg_ids keyed by engine_id
                  tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
                  completed=Dict() # completed msg_ids keyed by engine_id
                  all_completed=Set() # completed msg_ids keyed by engine_id
                  dead_engines=Set() # completed msg_ids keyed by engine_id
                  unassigned=Set() # set of task msg_ds not yet assigned a destination
                  incoming_registrations=Dict()
                  registration_timeout=Integer()
                  _idcounter=Integer(0)
                  # objects from constructor:
                  query=Instance(ZMQStream)
                  monitor=Instance(ZMQStream)
                  notifier=Instance(ZMQStream)
                  resubmit=Instance(ZMQStream)
                  heartmonitor=Instance(HeartMonitor)
                  db=Instance(object)
                  client_info=Dict()
                  engine_info=Dict()
                  def __init__(self, **kwargs):
                      """
                      # universal:
                      loop: IOLoop for creating future connections
                      session: streamsession for sending serialized data
                      # engine:
                      queue: ZMQStream for monitoring queue messages
                      query: ZMQStream for engine+client registration and client requests
                      heartbeat: HeartMonitor object for tracking engines
                      # extra:
                      db: ZMQStream for db connection (NotImplemented)
                      engine_info: zmq address/protocol dict for engine connections
                      client_info: zmq address/protocol dict for client connections
                      """
                      super(Hub, self).__init__(**kwargs)
                      # register our callbacks
                      self.query.on_recv(self.dispatch_query)
                      self.monitor.on_recv(self.dispatch_monitor_traffic)
                      self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
                      self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
                      self.monitor_handlers = {b'in' : self.save_queue_request,
                                              b'out': self.save_queue_result,
                                              b'intask': self.save_task_request,
                                              b'outtask': self.save_task_result,
                                              b'tracktask': self.save_task_destination,
                                              b'incontrol': _passer,
                                              b'outcontrol': _passer,
                                              b'iopub': self.save_iopub_message,
                      }
                      self.query_handlers = {'queue_request': self.queue_status,
                                              'result_request': self.get_results,
                                              'history_request': self.get_history,
                                              'db_request': self.db_query,
                                              'purge_request': self.purge_results,
                                              'load_request': self.check_load,
                                              'resubmit_request': self.resubmit_task,
                                              'shutdown_request': self.shutdown_request,
                                              'registration_request' : self.register_engine,
                                              'unregistration_request' : self.unregister_engine,
                                              'connection_request': self.connection_request,
                      }
                      # ignore resubmit replies
                      self.resubmit.on_recv(lambda msg: None, copy=False)
                      self.log.info("hub::created hub")
                  @property
                  def _next_id(self):
                      """gemerate a new ID.
                      No longer reuse old ids, just count from 0."""
                      newid = self._idcounter
                      self._idcounter += 1
                      return newid
                      # newid = 0
                      # incoming = [id[0] for id in itervalues(self.incoming_registrations)]
                      # # print newid, self.ids, self.incoming_registrations
                      # while newid in self.ids or newid in incoming:
                      #     newid += 1
                      # return newid
                  #-----------------------------------------------------------------------------
                  # message validation
                  #-----------------------------------------------------------------------------
                  def _validate_targets(self, targets):
                      """turn any valid targets argument into a list of integer ids"""
                      if targets is None:
                          # default to all
                          return self.ids
                      if isinstance(targets, (int,str,unicode_type)):
                          # only one target specified
                          targets = [targets]
                      _targets = []
                      for t in targets:
                          # map raw identities to ids
                          if isinstance(t, (str,unicode_type)):
                              t = self.by_ident.get(cast_bytes(t), t)
                          _targets.append(t)
                      targets = _targets
                      bad_targets = [ t for t in targets if t not in self.ids ]
                      if bad_targets:
                          raise IndexError("No Such Engine: %r" % bad_targets)
                      if not targets:
                          raise IndexError("No Engines Registered")
                      return targets
                  #-----------------------------------------------------------------------------
                  # dispatch methods (1 per stream)
                  #-----------------------------------------------------------------------------
                  @util.log_errors
                  def dispatch_monitor_traffic(self, msg):
                      """all ME and Task queue messages come through here, as well as
                      IOPub traffic."""
                      self.log.debug("monitor traffic: %r", msg[0])
                      switch = msg[0]
                      try:
                          idents, msg = self.session.feed_identities(msg[1:])
                      except ValueError:
                          idents=[]
                      if not idents:
                          self.log.error("Monitor message without topic: %r", msg)
                          return
                      handler = self.monitor_handlers.get(switch, None)
                      if handler is not None:
                          handler(idents, msg)
                      else:
                          self.log.error("Unrecognized monitor topic: %r", switch)
                  @util.log_errors
                  def dispatch_query(self, msg):
                      """Route registration requests and queries from clients."""
                      try:
                          idents, msg = self.session.feed_identities(msg)
                      except ValueError:
                          idents = []
                      if not idents:
                          self.log.error("Bad Query Message: %r", msg)
                          return
                      client_id = idents[0]
                      try:
                          msg = self.session.unserialize(msg, content=True)
                      except Exception:
                          content = error.wrap_exception()
                          self.log.error("Bad Query Message: %r", msg, exc_info=True)
                          self.session.send(self.query, "hub_error", ident=client_id,
                                  content=content)
                          return
                      # print client_id, header, parent, content
                      #switch on message type:
                      msg_type = msg['header']['msg_type']
                      self.log.info("client::client %r requested %r", client_id, msg_type)
                      handler = self.query_handlers.get(msg_type, None)
                      try:
                          assert handler is not None, "Bad Message Type: %r" % msg_type
                      except:
                          content = error.wrap_exception()
                          self.log.error("Bad Message Type: %r", msg_type, exc_info=True)
                          self.session.send(self.query, "hub_error", ident=client_id,
                                  content=content)
                          return
                      else:
                          handler(idents, msg)
                  def dispatch_db(self, msg):
                      """"""
                      raise NotImplementedError
                  #---------------------------------------------------------------------------
                  # handler methods (1 per event)
                  #---------------------------------------------------------------------------
                  #----------------------- Heartbeat --------------------------------------
                  def handle_new_heart(self, heart):
                      """handler to attach to heartbeater.
                      Called when a new heart starts to beat.
                      Triggers completion of registration."""
                      self.log.debug("heartbeat::handle_new_heart(%r)", heart)
                      if heart not in self.incoming_registrations:
                          self.log.info("heartbeat::ignoring new heart: %r", heart)
                      else:
                          self.finish_registration(heart)
                  def handle_heart_failure(self, heart):
                      """handler to attach to heartbeater.
                      called when a previously registered heart fails to respond to beat request.
                      triggers unregistration"""
                      self.log.debug("heartbeat::handle_heart_failure(%r)", heart)
                      eid = self.hearts.get(heart, None)
                      uuid = self.engines[eid].uuid
                      if eid is None or self.keytable[eid] in self.dead_engines:
                          self.log.info("heartbeat::ignoring heart failure %r (not an engine or already dead)", heart)
                      else:
                          self.unregister_engine(heart, dict(content=dict(id=eid, queue=uuid)))
                  #----------------------- MUX Queue Traffic ------------------------------
                  def save_queue_request(self, idents, msg):
                      if len(idents) < 2:
                          self.log.error("invalid identity prefix: %r", idents)
                          return
                      queue_id, client_id = idents[:2]
                      try:
                          msg = self.session.unserialize(msg)
                      except Exception:
                          self.log.error("queue::client %r sent invalid message to %r: %r", client_id, queue_id, msg, exc_info=True)
                          return
                      eid = self.by_ident.get(queue_id, None)
                      if eid is None:
                          self.log.error("queue::target %r not registered", queue_id)
                          self.log.debug("queue::    valid are: %r", self.by_ident.keys())
                          return
                      record = init_record(msg)
                      msg_id = record['msg_id']
                      self.log.info("queue::client %r submitted request %r to %s", client_id, msg_id, eid)
                      # Unicode in records
                      record['engine_uuid'] = queue_id.decode('ascii')
                      record['client_uuid'] = msg['header']['session']
                      record['queue'] = 'mux'
                      try:
                          # it's posible iopub arrived first:
                          existing = self.db.get_record(msg_id)
                          for key,evalue in iteritems(existing):
                              rvalue = record.get(key, None)
                              if evalue and rvalue and evalue != rvalue:
                                  self.log.warn("conflicting initial state for record: %r:%r <%r> %r", msg_id, rvalue, key, evalue)
                              elif evalue and not rvalue:
                                  record[key] = evalue
                          try:
                              self.db.update_record(msg_id, record)
                          except Exception:
                              self.log.error("DB Error updating record %r", msg_id, exc_info=True)
                      except KeyError:
                          try:
                              self.db.add_record(msg_id, record)
                          except Exception:
                              self.log.error("DB Error adding record %r", msg_id, exc_info=True)
                      self.pending.add(msg_id)
                      self.queues[eid].append(msg_id)
                  def save_queue_result(self, idents, msg):
                      if len(idents) < 2:
                          self.log.error("invalid identity prefix: %r", idents)
                          return
                      client_id, queue_id = idents[:2]
                      try:
                          msg = self.session.unserialize(msg)
                      except Exception:
                          self.log.error("queue::engine %r sent invalid message to %r: %r",
                                  queue_id, client_id, msg, exc_info=True)
                          return
                      eid = self.by_ident.get(queue_id, None)
                      if eid is None:
                          self.log.error("queue::unknown engine %r is sending a reply: ", queue_id)
                          return
                      parent = msg['parent_header']
                      if not parent:
                          return
                      msg_id = parent['msg_id']
                      if msg_id in self.pending:
                          self.pending.remove(msg_id)
                          self.all_completed.add(msg_id)
                          self.queues[eid].remove(msg_id)
                          self.completed[eid].append(msg_id)
                          self.log.info("queue::request %r completed on %s", msg_id, eid)
                      elif msg_id not in self.all_completed:
                          # it could be a result from a dead engine that died before delivering the
                          # result
                          self.log.warn("queue:: unknown msg finished %r", msg_id)
                          return
                      # update record anyway, because the unregistration could have been premature
                      rheader = msg['header']
                      md = msg['metadata']
                      completed = rheader['date']
                      started = extract_dates(md.get('started', None))
                      result = {
                          'result_header' : rheader,
                          'result_metadata': md,
                          'result_content': msg['content'],
                          'received': datetime.now(),
                          'started' : started,
                          'completed' : completed
                      }
                      result['result_buffers'] = msg['buffers']
                      try:
                          self.db.update_record(msg_id, result)
                      except Exception:
                          self.log.error("DB Error updating record %r", msg_id, exc_info=True)
                  #--------------------- Task Queue Traffic ------------------------------
                  def save_task_request(self, idents, msg):
                      """Save the submission of a task."""
                      client_id = idents[0]
                      try:
                          msg = self.session.unserialize(msg)
                      except Exception:
                          self.log.error("task::client %r sent invalid task message: %r",
                                  client_id, msg, exc_info=True)
                          return
                      record = init_record(msg)
                      record['client_uuid'] = msg['header']['session']
                      record['queue'] = 'task'
                      header = msg['header']
                      msg_id = header['msg_id']
                      self.pending.add(msg_id)
                      self.unassigned.add(msg_id)
                      try:
                          # it's posible iopub arrived first:
                          existing = self.db.get_record(msg_id)
                          if existing['resubmitted']:
                              for key in ('submitted', 'client_uuid', 'buffers'):
                                  # don't clobber these keys on resubmit
                                  # submitted and client_uuid should be different
                                  # and buffers might be big, and shouldn't have changed
                                  record.pop(key)
                                  # still check content,header which should not change
                                  # but are not expensive to compare as buffers
                          for key,evalue in iteritems(existing):
                              if key.endswith('buffers'):
                                  # don't compare buffers
                                  continue
                              rvalue = record.get(key, None)
                              if evalue and rvalue and evalue != rvalue:
                                  self.log.warn("conflicting initial state for record: %r:%r <%r> %r", msg_id, rvalue, key, evalue)
                              elif evalue and not rvalue:
                                  record[key] = evalue
                          try:
                              self.db.update_record(msg_id, record)
                          except Exception:
                              self.log.error("DB Error updating record %r", msg_id, exc_info=True)
                      except KeyError:
                          try:
                              self.db.add_record(msg_id, record)
                          except Exception:
                              self.log.error("DB Error adding record %r", msg_id, exc_info=True)
                      except Exception:
                          self.log.error("DB Error saving task request %r", msg_id, exc_info=True)
                  def save_task_result(self, idents, msg):
                      """save the result of a completed task."""
                      client_id = idents[0]
                      try:
                          msg = self.session.unserialize(msg)
                      except Exception:
                          self.log.error("task::invalid task result message send to %r: %r",
                                  client_id, msg, exc_info=True)
                          return
                      parent = msg['parent_header']
                      if not parent:
                          # print msg
                          self.log.warn("Task %r had no parent!", msg)
                          return
                      msg_id = parent['msg_id']
                      if msg_id in self.unassigned:
                          self.unassigned.remove(msg_id)
                      header = msg['header']
                      md = msg['metadata']
                      engine_uuid = md.get('engine', u'')
                      eid = self.by_ident.get(cast_bytes(engine_uuid), None)
                      status = md.get('status', None)
                      if msg_id in self.pending:
                          self.log.info("task::task %r finished on %s", msg_id, eid)
                          self.pending.remove(msg_id)
                          self.all_completed.add(msg_id)
                          if eid is not None:
                              if status != 'aborted':
                                  self.completed[eid].append(msg_id)
                              if msg_id in self.tasks[eid]:
                                  self.tasks[eid].remove(msg_id)
                          completed = header['date']
                          started = extract_dates(md.get('started', None))
                          result = {
                              'result_header' : header,
                              'result_metadata': msg['metadata'],
                              'result_content': msg['content'],
                              'started' : started,
                              'completed' : completed,
                              'received' : datetime.now(),
                              'engine_uuid': engine_uuid,
                          }
                          result['result_buffers'] = msg['buffers']
                          try:
                              self.db.update_record(msg_id, result)
                          except Exception:
                              self.log.error("DB Error saving task request %r", msg_id, exc_info=True)
                      else:
                          self.log.debug("task::unknown task %r finished", msg_id)
                  def save_task_destination(self, idents, msg):
                      try:
                          msg = self.session.unserialize(msg, content=True)
                      except Exception:
                          self.log.error("task::invalid task tracking message", exc_info=True)
                          return
                      content = msg['content']
                      # print (content)
                      msg_id = content['msg_id']
                      engine_uuid = content['engine_id']
                      eid = self.by_ident[cast_bytes(engine_uuid)]
                      self.log.info("task::task %r arrived on %r", msg_id, eid)
                      if msg_id in self.unassigned:
                          self.unassigned.remove(msg_id)
                      # else:
                      #     self.log.debug("task::task %r not listed as MIA?!"%(msg_id))
                      self.tasks[eid].append(msg_id)
                      # self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
                      try:
                          self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
                      except Exception:
                          self.log.error("DB Error saving task destination %r", msg_id, exc_info=True)
                  def mia_task_request(self, idents, msg):
                      raise NotImplementedError
                      client_id = idents[0]
                      # content = dict(mia=self.mia,status='ok')
                      # self.session.send('mia_reply', content=content, idents=client_id)
                  #--------------------- IOPub Traffic ------------------------------
                  def save_iopub_message(self, topics, msg):
                      """save an iopub message into the db"""
                      # print (topics)
                      try:
                          msg = self.session.unserialize(msg, content=True)
                      except Exception:
                          self.log.error("iopub::invalid IOPub message", exc_info=True)
                          return
                      parent = msg['parent_header']
                      if not parent:
                          self.log.debug("iopub::IOPub message lacks parent: %r", msg)
                          return
                      msg_id = parent['msg_id']
                      msg_type = msg['header']['msg_type']
                      content = msg['content']
                      # ensure msg_id is in db
                      try:
                          rec = self.db.get_record(msg_id)
                      except KeyError:
                          rec = None
                      # stream
                      d = {}
                      if msg_type == 'stream':
                          name = content['name']
                          s = '' if rec is None else rec[name]
                          d[name] = s + content['data']
                      elif msg_type == 'error':
                          d['error'] = content
                      elif msg_type == 'execute_input':
                          d['execute_input'] = content['code']
                      elif msg_type in ('display_data', 'execute_result'):
                          d[msg_type] = content
                      elif msg_type == 'status':
                          pass
                      elif msg_type == 'data_pub':
                          self.log.info("ignored data_pub message for %s" % msg_id)
                      else:
                          self.log.warn("unhandled iopub msg_type: %r", msg_type)
                      if not d:
                          return
                      if rec is None:
                          # new record
                          rec = empty_record()
                          rec['msg_id'] = msg_id
                          rec.update(d)
                          d = rec
                          update_record = self.db.add_record
                      else:
                          update_record = self.db.update_record
                      try:
                          update_record(msg_id, d)
                      except Exception:
                          self.log.error("DB Error saving iopub message %r", msg_id, exc_info=True)
                  #-------------------------------------------------------------------------
                  # Registration requests
                  #-------------------------------------------------------------------------
                  def connection_request(self, client_id, msg):
                      """Reply with connection addresses for clients."""
                      self.log.info("client::client %r connected", client_id)
                      content = dict(status='ok')
                      jsonable = {}
                      for k,v in iteritems(self.keytable):
                          if v not in self.dead_engines:
                              jsonable[str(k)] = v
                      content['engines'] = jsonable
                      self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
                  def register_engine(self, reg, msg):
                      """Register a new engine."""
                      content = msg['content']
                      try:
                          uuid = content['uuid']
                      except KeyError:
                          self.log.error("registration::queue not specified", exc_info=True)
                          return
                      eid = self._next_id
                      self.log.debug("registration::register_engine(%i, %r)", eid, uuid)
                      content = dict(id=eid,status='ok',hb_period=self.heartmonitor.period)
                      # check if requesting available IDs:
                      if cast_bytes(uuid) in self.by_ident:
                          try:
                              raise KeyError("uuid %r in use" % uuid)
                          except:
                              content = error.wrap_exception()
                              self.log.error("uuid %r in use", uuid, exc_info=True)
                      else:
                          for h, ec in iteritems(self.incoming_registrations):
                              if uuid == h:
                                  try:
                                      raise KeyError("heart_id %r in use" % uuid)
                                  except:
                                      self.log.error("heart_id %r in use", uuid, exc_info=True)
                                      content = error.wrap_exception()
                                  break
                              elif uuid == ec.uuid:
                                  try:
                                      raise KeyError("uuid %r in use" % uuid)
                                  except:
                                      self.log.error("uuid %r in use", uuid, exc_info=True)
                                      content = error.wrap_exception()
                                  break
                      msg = self.session.send(self.query, "registration_reply",
                              content=content,
                              ident=reg)
                      heart = cast_bytes(uuid)
                      if content['status'] == 'ok':
                          if heart in self.heartmonitor.hearts:
                              # already beating
                              self.incoming_registrations[heart] = EngineConnector(id=eid,uuid=uuid)
                              self.finish_registration(heart)
                          else:
                              purge = lambda : self._purge_stalled_registration(heart)
-                             dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
-                             dc.start()
-                             self.incoming_registrations[heart] = EngineConnector(id=eid,uuid=uuid,stallback=dc)
+                             t = self.loop.add_timeout(
+                                 self.loop.time() + self.registration_timeout,
+                                 purge,
+                             )
+                             self.incoming_registrations[heart] = EngineConnector(id=eid,uuid=uuid,stallback=t)
                      else:
                          self.log.error("registration::registration %i failed: %r", eid, content['evalue'])
                      return eid
                  def unregister_engine(self, ident, msg):
                      """Unregister an engine that explicitly requested to leave."""
                      try:
                          eid = msg['content']['id']
                      except:
                          self.log.error("registration::bad engine id for unregistration: %r", ident, exc_info=True)
                          return
                      self.log.info("registration::unregister_engine(%r)", eid)
-                     # print (eid)
                      uuid = self.keytable[eid]
                      content=dict(id=eid, uuid=uuid)
                      self.dead_engines.add(uuid)
-                     # self.ids.remove(eid)
-                     # uuid = self.keytable.pop(eid)
+                     #
-                     # ec = self.engines.pop(eid)
-                     # self.hearts.pop(ec.heartbeat)
-                     # self.by_ident.pop(ec.queue)
-                     # self.completed.pop(eid)
-                     handleit = lambda : self._handle_stranded_msgs(eid, uuid)
-                     dc = ioloop.DelayedCallback(handleit, self.registration_timeout, self.loop)
-                     dc.start()
+                     self.loop.add_timeout(
+                         self.loop.time() + self.registration_timeout,
+                         lambda : self._handle_stranded_msgs(eid, uuid),
+                     )
                      ############## TODO: HANDLE IT ################
                      self._save_engine_state()
                      if self.notifier:
                          self.session.send(self.notifier, "unregistration_notification", content=content)
                  def _handle_stranded_msgs(self, eid, uuid):
                      """Handle messages known to be on an engine when the engine unregisters.
                      It is possible that this will fire prematurely - that is, an engine will
                      go down after completing a result, and the client will be notified
                      that the result failed and later receive the actual result.
                      """
                      outstanding = self.queues[eid]
                      for msg_id in outstanding:
                          self.pending.remove(msg_id)
                          self.all_completed.add(msg_id)
                          try:
                              raise error.EngineError("Engine %r died while running task %r" % (eid, msg_id))
                          except:
                              content = error.wrap_exception()
                          # build a fake header:
                          header = {}
                          header['engine'] = uuid
                          header['date'] = datetime.now()
                          rec = dict(result_content=content, result_header=header, result_buffers=[])
                          rec['completed'] = header['date']
                          rec['engine_uuid'] = uuid
                          try:
                              self.db.update_record(msg_id, rec)
                          except Exception:
                              self.log.error("DB Error handling stranded msg %r", msg_id, exc_info=True)
                  def finish_registration(self, heart):
                      """Second half of engine registration, called after our HeartMonitor
                      has received a beat from the Engine's Heart."""
                      try:
                          ec = self.incoming_registrations.pop(heart)
                      except KeyError:
                          self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
                          return
                      self.log.info("registration::finished registering engine %i:%s", ec.id, ec.uuid)
                      if ec.stallback is not None:
-                         ec.stallback.stop()
+                         self.loop.remove_timeout(ec.stallback)
                      eid = ec.id
                      self.ids.add(eid)
                      self.keytable[eid] = ec.uuid
                      self.engines[eid] = ec
                      self.by_ident[cast_bytes(ec.uuid)] = ec.id
                      self.queues[eid] = list()
                      self.tasks[eid] = list()
                      self.completed[eid] = list()
                      self.hearts[heart] = eid
                      content = dict(id=eid, uuid=self.engines[eid].uuid)
                      if self.notifier:
                          self.session.send(self.notifier, "registration_notification", content=content)
                      self.log.info("engine::Engine Connected: %i", eid)
                      self._save_engine_state()
                  def _purge_stalled_registration(self, heart):
                      if heart in self.incoming_registrations:
                          ec = self.incoming_registrations.pop(heart)
                          self.log.info("registration::purging stalled registration: %i", ec.id)
                      else:
                          pass
                  #-------------------------------------------------------------------------
                  # Engine State
                  #-------------------------------------------------------------------------
                  def _cleanup_engine_state_file(self):
                      """cleanup engine state mapping"""
                      if os.path.exists(self.engine_state_file):
                          self.log.debug("cleaning up engine state: %s", self.engine_state_file)
                          try:
                              os.remove(self.engine_state_file)
                          except IOError:
                              self.log.error("Couldn't cleanup file: %s", self.engine_state_file, exc_info=True)
                  def _save_engine_state(self):
                      """save engine mapping to JSON file"""
                      if not self.engine_state_file:
                          return
                      self.log.debug("save engine state to %s" % self.engine_state_file)
                      state = {}
                      engines = {}
                      for eid, ec in iteritems(self.engines):
                          if ec.uuid not in self.dead_engines:
                              engines[eid] = ec.uuid
                      state['engines'] = engines
                      state['next_id'] = self._idcounter
                      with open(self.engine_state_file, 'w') as f:
                          json.dump(state, f)
                  def _load_engine_state(self):
                      """load engine mapping from JSON file"""
                      if not os.path.exists(self.engine_state_file):
                          return
                      self.log.info("loading engine state from %s" % self.engine_state_file)
                      with open(self.engine_state_file) as f:
                          state = json.load(f)
                      save_notifier = self.notifier
                      self.notifier = None
                      for eid, uuid in iteritems(state['engines']):
                          heart = uuid.encode('ascii')
                          # start with this heart as current and beating:
                          self.heartmonitor.responses.add(heart)
                          self.heartmonitor.hearts.add(heart)
                          self.incoming_registrations[heart] = EngineConnector(id=int(eid), uuid=uuid)
                          self.finish_registration(heart)
                      self.notifier = save_notifier
                      self._idcounter = state['next_id']
                  #-------------------------------------------------------------------------
                  # Client Requests
                  #-------------------------------------------------------------------------
                  def shutdown_request(self, client_id, msg):
                      """handle shutdown request."""
                      self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
                      # also notify other clients of shutdown
                      self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
-                     dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
-                     dc.start()
+                     self.loop.add_timeout(self.loop.time() + 1, self._shutdown)
                  def _shutdown(self):
                      self.log.info("hub::hub shutting down.")
                      time.sleep(0.1)
                      sys.exit(0)
                  def check_load(self, client_id, msg):
                      content = msg['content']
                      try:
                          targets = content['targets']
                          targets = self._validate_targets(targets)
                      except:
                          content = error.wrap_exception()
                          self.session.send(self.query, "hub_error",
                                  content=content, ident=client_id)
                          return
                      content = dict(status='ok')
                      # loads = {}
                      for t in targets:
                          content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
                      self.session.send(self.query, "load_reply", content=content, ident=client_id)
                  def queue_status(self, client_id, msg):
                      """Return the Queue status of one or more targets.
                      If verbose, return the msg_ids, else return len of each type.
                      Keys:
                      * queue (pending MUX jobs)
                      * tasks (pending Task jobs)
                      * completed (finished jobs from both queues)
                      """
                      content = msg['content']
                      targets = content['targets']
                      try:
                          targets = self._validate_targets(targets)
                      except:
                          content = error.wrap_exception()
                          self.session.send(self.query, "hub_error",
                                  content=content, ident=client_id)
                          return
                      verbose = content.get('verbose', False)
                      content = dict(status='ok')
                      for t in targets:
                          queue = self.queues[t]
                          completed = self.completed[t]
                          tasks = self.tasks[t]
                          if not verbose:
                              queue = len(queue)
                              completed = len(completed)
                              tasks = len(tasks)
                          content[str(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
                      content['unassigned'] = list(self.unassigned) if verbose else len(self.unassigned)
                      # print (content)
                      self.session.send(self.query, "queue_reply", content=content, ident=client_id)
                  def purge_results(self, client_id, msg):
                      """Purge results from memory. This method is more valuable before we move
                      to a DB based message storage mechanism."""
                      content = msg['content']
                      self.log.info("Dropping records with %s", content)
                      msg_ids = content.get('msg_ids', [])
                      reply = dict(status='ok')
                      if msg_ids == 'all':
                          try:
                              self.db.drop_matching_records(dict(completed={'$ne':None}))
                          except Exception:
                              reply = error.wrap_exception()
                              self.log.exception("Error dropping records")
                      else:
                          pending = [m for m in msg_ids if (m in self.pending)]
                          if pending:
                              try:
                                  raise IndexError("msg pending: %r" % pending[0])
                              except:
                                  reply = error.wrap_exception()
                                  self.log.exception("Error dropping records")
                          else:
                              try:
                                  self.db.drop_matching_records(dict(msg_id={'$in':msg_ids}))
                              except Exception:
                                  reply = error.wrap_exception()
                                  self.log.exception("Error dropping records")
                          if reply['status'] == 'ok':
                              eids = content.get('engine_ids', [])
                              for eid in eids:
                                  if eid not in self.engines:
                                      try:
                                          raise IndexError("No such engine: %i" % eid)
                                      except:
                                          reply = error.wrap_exception()
                                          self.log.exception("Error dropping records")
                                      break
                                  uid = self.engines[eid].uuid
                                  try:
                                      self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
                                  except Exception:
                                      reply = error.wrap_exception()
                                      self.log.exception("Error dropping records")
                                      break
                      self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
                  def resubmit_task(self, client_id, msg):
                      """Resubmit one or more tasks."""
                      def finish(reply):
                          self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id)
                      content = msg['content']
                      msg_ids = content['msg_ids']
                      reply = dict(status='ok')
                      try:
                          records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[
                              'header', 'content', 'buffers'])
                      except Exception:
                          self.log.error('db::db error finding tasks to resubmit', exc_info=True)
                          return finish(error.wrap_exception())
                      # validate msg_ids
                      found_ids = [ rec['msg_id'] for rec in records ]
                      pending_ids = [ msg_id for msg_id in found_ids if msg_id in self.pending ]
                      if len(records) > len(msg_ids):
                          try:
                              raise RuntimeError("DB appears to be in an inconsistent state."
                                  "More matching records were found than should exist")
                          except Exception:
                              self.log.exception("Failed to resubmit task")
                              return finish(error.wrap_exception())
                      elif len(records) < len(msg_ids):
                          missing = [ m for m in msg_ids if m not in found_ids ]
                          try:
                              raise KeyError("No such msg(s): %r" % missing)
                          except KeyError:
                              self.log.exception("Failed to resubmit task")
                              return finish(error.wrap_exception())
                      elif pending_ids:
                          pass
                          # no need to raise on resubmit of pending task, now that we
                          # resubmit under new ID, but do we want to raise anyway?
                          # msg_id = invalid_ids[0]
                          # try:
                          #     raise ValueError("Task(s) %r appears to be inflight" % )
                          # except Exception:
                          #     return finish(error.wrap_exception())
                      # mapping of original IDs to resubmitted IDs
                      resubmitted = {}
                      # send the messages
                      for rec in records:
                          header = rec['header']
                          msg = self.session.msg(header['msg_type'], parent=header)
                          msg_id = msg['msg_id']
                          msg['content'] = rec['content']
                          # use the old header, but update msg_id and timestamp
                          fresh = msg['header']
                          header['msg_id'] = fresh['msg_id']
                          header['date'] = fresh['date']
                          msg['header'] = header
                          self.session.send(self.resubmit, msg, buffers=rec['buffers'])
                          resubmitted[rec['msg_id']] = msg_id
                          self.pending.add(msg_id)
                          msg['buffers'] = rec['buffers']
                          try:
                              self.db.add_record(msg_id, init_record(msg))
                          except Exception:
                              self.log.error("db::DB Error updating record: %s", msg_id, exc_info=True)
                              return finish(error.wrap_exception())
                      finish(dict(status='ok', resubmitted=resubmitted))
                      # store the new IDs in the Task DB
                      for msg_id, resubmit_id in iteritems(resubmitted):
                          try:
                              self.db.update_record(msg_id, {'resubmitted' : resubmit_id})
                          except Exception:
                              self.log.error("db::DB Error updating record: %s", msg_id, exc_info=True)
                  def _extract_record(self, rec):
                      """decompose a TaskRecord dict into subsection of reply for get_result"""
                      io_dict = {}
                      for key in ('execute_input', 'execute_result', 'error', 'stdout', 'stderr'):
                              io_dict[key] = rec[key]
                      content = {
                          'header': rec['header'],
                          'metadata': rec['metadata'],
                          'result_metadata': rec['result_metadata'],
                          'result_header' : rec['result_header'],
                          'result_content': rec['result_content'],
                          'received' : rec['received'],
                          'io' : io_dict,
                      }
                      if rec['result_buffers']:
                          buffers = list(map(bytes, rec['result_buffers']))
                      else:
                          buffers = []
                      return content, buffers
                  def get_results(self, client_id, msg):
                      """Get the result of 1 or more messages."""
                      content = msg['content']
                      msg_ids = sorted(set(content['msg_ids']))
                      statusonly = content.get('status_only', False)
                      pending = []
                      completed = []
                      content = dict(status='ok')
                      content['pending'] = pending
                      content['completed'] = completed
                      buffers = []
                      if not statusonly:
                          try:
                              matches = self.db.find_records(dict(msg_id={'$in':msg_ids}))
                              # turn match list into dict, for faster lookup
                              records = {}
                              for rec in matches:
                                  records[rec['msg_id']] = rec
                          except Exception:
                              content = error.wrap_exception()
                              self.log.exception("Failed to get results")
                              self.session.send(self.query, "result_reply", content=content,
                                                                  parent=msg, ident=client_id)
                              return
                      else:
                          records = {}
                      for msg_id in msg_ids:
                          if msg_id in self.pending:
                              pending.append(msg_id)
                          elif msg_id in self.all_completed:
                              completed.append(msg_id)
                              if not statusonly:
                                  c,bufs = self._extract_record(records[msg_id])
                                  content[msg_id] = c
                                  buffers.extend(bufs)
                          elif msg_id in records:
                              if rec['completed']:
                                  completed.append(msg_id)
                                  c,bufs = self._extract_record(records[msg_id])
                                  content[msg_id] = c
                                  buffers.extend(bufs)
                              else:
                                  pending.append(msg_id)
                          else:
                              try:
                                  raise KeyError('No such message: '+msg_id)
                              except:
                                  content = error.wrap_exception()
                              break
                      self.session.send(self.query, "result_reply", content=content,
                                                          parent=msg, ident=client_id,
                                                          buffers=buffers)
                  def get_history(self, client_id, msg):
                      """Get a list of all msg_ids in our DB records"""
                      try:
                          msg_ids = self.db.get_history()
                      except Exception as e:
                          content = error.wrap_exception()
                          self.log.exception("Failed to get history")
                      else:
                          content = dict(status='ok', history=msg_ids)
                      self.session.send(self.query, "history_reply", content=content,
                                                          parent=msg, ident=client_id)
                  def db_query(self, client_id, msg):
                      """Perform a raw query on the task record database."""
                      content = msg['content']
                      query = extract_dates(content.get('query', {}))
                      keys = content.get('keys', None)
                      buffers = []
                      empty = list()
                      try:
                          records = self.db.find_records(query, keys)
                      except Exception as e:
                          content = error.wrap_exception()
                          self.log.exception("DB query failed")
                      else:
                          # extract buffers from reply content:
                          if keys is not None:
                              buffer_lens = [] if 'buffers' in keys else None
                              result_buffer_lens = [] if 'result_buffers' in keys else None
                          else:
                              buffer_lens = None
                              result_buffer_lens = None
                          for rec in records:
                              # buffers may be None, so double check
                              b = rec.pop('buffers', empty) or empty
                              if buffer_lens is not None:
                                  buffer_lens.append(len(b))
                                  buffers.extend(b)
                              rb = rec.pop('result_buffers', empty) or empty
                              if result_buffer_lens is not None:
                                  result_buffer_lens.append(len(rb))
                                  buffers.extend(rb)
                          content = dict(status='ok', records=records, buffer_lens=buffer_lens,
                                                  result_buffer_lens=result_buffer_lens)
                      # self.log.debug (content)
                      self.session.send(self.query, "db_reply", content=content,
                                                          parent=msg, ident=client_id,
                                                          buffers=buffers)

IPython/parallel/controller/scheduler.py

0 +3 -2

              """The Python scheduler for rich scheduling.
              The Pure ZMQ scheduler does not allow routing schemes other than LRU,
              nor does it check msg_id DAG dependencies. For those, a slightly slower
              Python Scheduler exists.
              """
              # Copyright (c) IPython Development Team.
              # Distributed under the terms of the Modified BSD License.
              import logging
              import sys
              import time
              from collections import deque
              from datetime import datetime
              from random import randint, random
              from types import FunctionType
              try:
                  import numpy
              except ImportError:
                  numpy = None
              import zmq
              from zmq.eventloop import ioloop, zmqstream
              # local imports
              from IPython.external.decorator import decorator
              from IPython.config.application import Application
              from IPython.config.loader import Config
              from IPython.utils.traitlets import Instance, Dict, List, Set, Integer, Enum, CBytes
              from IPython.utils.py3compat import cast_bytes
              from IPython.parallel import error, util
              from IPython.parallel.factory import SessionFactory
              from IPython.parallel.util import connect_logger, local_logger
              from .dependency import Dependency
              @decorator
              def logged(f,self,*args,**kwargs):
                  # print ("#--------------------")
                  self.log.debug("scheduler::%s(*%s,**%s)", f.__name__, args, kwargs)
                  # print ("#--")
                  return f(self,*args, **kwargs)
              #----------------------------------------------------------------------
              # Chooser functions
              #----------------------------------------------------------------------
              def plainrandom(loads):
                  """Plain random pick."""
                  n = len(loads)
                  return randint(0,n-1)
              def lru(loads):
                  """Always pick the front of the line.
                  The content of `loads` is ignored.
                  Assumes LRU ordering of loads, with oldest first.
                  """
                  return 0
              def twobin(loads):
                  """Pick two at random, use the LRU of the two.
                  The content of loads is ignored.
                  Assumes LRU ordering of loads, with oldest first.
                  """
                  n = len(loads)
                  a = randint(0,n-1)
                  b = randint(0,n-1)
                  return min(a,b)
              def weighted(loads):
                  """Pick two at random using inverse load as weight.
                  Return the less loaded of the two.
                  """
                  # weight 0 a million times more than 1:
                  weights = 1./(1e-6+numpy.array(loads))
                  sums = weights.cumsum()
                  t = sums[-1]
                  x = random()*t
                  y = random()*t
                  idx = 0
                  idy = 0
                  while sums[idx] < x:
                      idx += 1
                  while sums[idy] < y:
                      idy += 1
                  if weights[idy] > weights[idx]:
                      return idy
                  else:
                      return idx
              def leastload(loads):
                  """Always choose the lowest load.
                  If the lowest load occurs more than once, the first
                  occurance will be used.  If loads has LRU ordering, this means
                  the LRU of those with the lowest load is chosen.
                  """
                  return loads.index(min(loads))
              #---------------------------------------------------------------------
              # Classes
              #---------------------------------------------------------------------
              # store empty default dependency:
              MET = Dependency([])
              class Job(object):
                  """Simple container for a job"""
                  def __init__(self, msg_id, raw_msg, idents, msg, header, metadata,
                                  targets, after, follow, timeout):
                      self.msg_id = msg_id
                      self.raw_msg = raw_msg
                      self.idents = idents
                      self.msg = msg
                      self.header = header
                      self.metadata = metadata
                      self.targets = targets
                      self.after = after
                      self.follow = follow
                      self.timeout = timeout
                      self.removed = False # used for lazy-delete from sorted queue
                      self.timestamp = time.time()
                      self.timeout_id = 0
                      self.blacklist = set()
                  def __lt__(self, other):
                      return self.timestamp < other.timestamp
                  def __cmp__(self, other):
                      return cmp(self.timestamp, other.timestamp)
                  @property
                  def dependents(self):
                      return self.follow.union(self.after)
              class TaskScheduler(SessionFactory):
                  """Python TaskScheduler object.
                  This is the simplest object that supports msg_id based
                  DAG dependencies. *Only* task msg_ids are checked, not
                  msg_ids of jobs submitted via the MUX queue.
                  """
                  hwm = Integer(1, config=True,
                      help="""specify the High Water Mark (HWM) for the downstream
                      socket in the Task scheduler. This is the maximum number
                      of allowed outstanding tasks on each engine.
                      The default (1) means that only one task can be outstanding on each
                      engine.  Setting TaskScheduler.hwm=0 means there is no limit, and the
                      engines continue to be assigned tasks while they are working,
                      effectively hiding network latency behind computation, but can result
                      in an imbalance of work when submitting many heterogenous tasks all at
                      once.  Any positive value greater than one is a compromise between the
                      two.
                      """
                  )
                  scheme_name = Enum(('leastload', 'pure', 'lru', 'plainrandom', 'weighted', 'twobin'),
                      'leastload', config=True, allow_none=False,
                      help="""select the task scheduler scheme  [default: Python LRU]
                      Options are: 'pure', 'lru', 'plainrandom', 'weighted', 'twobin','leastload'"""
                  )
                  def _scheme_name_changed(self, old, new):
                      self.log.debug("Using scheme %r"%new)
                      self.scheme = globals()[new]
                  # input arguments:
                  scheme = Instance(FunctionType) # function for determining the destination
                  def _scheme_default(self):
                      return leastload
                  client_stream = Instance(zmqstream.ZMQStream) # client-facing stream
                  engine_stream = Instance(zmqstream.ZMQStream) # engine-facing stream
                  notifier_stream = Instance(zmqstream.ZMQStream) # hub-facing sub stream
                  mon_stream = Instance(zmqstream.ZMQStream) # hub-facing pub stream
                  query_stream = Instance(zmqstream.ZMQStream) # hub-facing DEALER stream
                  # internals:
                  queue = Instance(deque) # sorted list of Jobs
                  def _queue_default(self):
                      return deque()
                  queue_map = Dict() # dict by msg_id of Jobs (for O(1) access to the Queue)
                  graph = Dict() # dict by msg_id of [ msg_ids that depend on key ]
                  retries = Dict() # dict by msg_id of retries remaining (non-neg ints)
                  # waiting = List() # list of msg_ids ready to run, but haven't due to HWM
                  pending = Dict() # dict by engine_uuid of submitted tasks
                  completed = Dict() # dict by engine_uuid of completed tasks
                  failed = Dict() # dict by engine_uuid of failed tasks
                  destinations = Dict() # dict by msg_id of engine_uuids where jobs ran (reverse of completed+failed)
                  clients = Dict() # dict by msg_id for who submitted the task
                  targets = List() # list of target IDENTs
                  loads = List() # list of engine loads
                  # full = Set() # set of IDENTs that have HWM outstanding tasks
                  all_completed = Set() # set of all completed tasks
                  all_failed = Set() # set of all failed tasks
                  all_done = Set() # set of all finished tasks=union(completed,failed)
                  all_ids = Set() # set of all submitted task IDs
                  ident = CBytes() # ZMQ identity. This should just be self.session.session
                                   # but ensure Bytes
                  def _ident_default(self):
                      return self.session.bsession
                  def start(self):
                      self.query_stream.on_recv(self.dispatch_query_reply)
                      self.session.send(self.query_stream, "connection_request", {})
                      self.engine_stream.on_recv(self.dispatch_result, copy=False)
                      self.client_stream.on_recv(self.dispatch_submission, copy=False)
                      self._notification_handlers = dict(
                          registration_notification = self._register_engine,
                          unregistration_notification = self._unregister_engine
                      )
                      self.notifier_stream.on_recv(self.dispatch_notification)
                      self.log.info("Scheduler started [%s]" % self.scheme_name)
                  def resume_receiving(self):
                      """Resume accepting jobs."""
                      self.client_stream.on_recv(self.dispatch_submission, copy=False)
                  def stop_receiving(self):
                      """Stop accepting jobs while there are no engines.
                      Leave them in the ZMQ queue."""
                      self.client_stream.on_recv(None)
                  #-----------------------------------------------------------------------
                  # [Un]Registration Handling
                  #-----------------------------------------------------------------------
                  def dispatch_query_reply(self, msg):
                      """handle reply to our initial connection request"""
                      try:
                          idents,msg = self.session.feed_identities(msg)
                      except ValueError:
                          self.log.warn("task::Invalid Message: %r",msg)
                          return
                      try:
                          msg = self.session.unserialize(msg)
                      except ValueError:
                          self.log.warn("task::Unauthorized message from: %r"%idents)
                          return
                      content = msg['content']
                      for uuid in content.get('engines', {}).values():
                          self._register_engine(cast_bytes(uuid))
                  @util.log_errors
                  def dispatch_notification(self, msg):
                      """dispatch register/unregister events."""
                      try:
                          idents,msg = self.session.feed_identities(msg)
                      except ValueError:
                          self.log.warn("task::Invalid Message: %r",msg)
                          return
                      try:
                          msg = self.session.unserialize(msg)
                      except ValueError:
                          self.log.warn("task::Unauthorized message from: %r"%idents)
                          return
                      msg_type = msg['header']['msg_type']
                      handler = self._notification_handlers.get(msg_type, None)
                      if handler is None:
                          self.log.error("Unhandled message type: %r"%msg_type)
                      else:
                          try:
                              handler(cast_bytes(msg['content']['uuid']))
                          except Exception:
                              self.log.error("task::Invalid notification msg: %r", msg, exc_info=True)
                  def _register_engine(self, uid):
                      """New engine with ident `uid` became available."""
                      # head of the line:
                      self.targets.insert(0,uid)
                      self.loads.insert(0,0)
                      # initialize sets
                      self.completed[uid] = set()
                      self.failed[uid] = set()
                      self.pending[uid] = {}
                      # rescan the graph:
                      self.update_graph(None)
                  def _unregister_engine(self, uid):
                      """Existing engine with ident `uid` became unavailable."""
                      if len(self.targets) == 1:
                          # this was our only engine
                          pass
                      # handle any potentially finished tasks:
                      self.engine_stream.flush()
                      # don't pop destinations, because they might be used later
                      # map(self.destinations.pop, self.completed.pop(uid))
                      # map(self.destinations.pop, self.failed.pop(uid))
                      # prevent this engine from receiving work
                      idx = self.targets.index(uid)
                      self.targets.pop(idx)
                      self.loads.pop(idx)
                      # wait 5 seconds before cleaning up pending jobs, since the results might
                      # still be incoming
                      if self.pending[uid]:
-                         dc = ioloop.DelayedCallback(lambda : self.handle_stranded_tasks(uid), 5000, self.loop)
-                         dc.start()
+                         self.loop.add_timeout(self.loop.time() + 5,
+                             lambda : self.handle_stranded_tasks(uid),
+                         )
                      else:
                          self.completed.pop(uid)
                          self.failed.pop(uid)
                  def handle_stranded_tasks(self, engine):
                      """Deal with jobs resident in an engine that died."""
                      lost = self.pending[engine]
                      for msg_id in lost.keys():
                          if msg_id not in self.pending[engine]:
                              # prevent double-handling of messages
                              continue
                          raw_msg = lost[msg_id].raw_msg
                          idents,msg = self.session.feed_identities(raw_msg, copy=False)
                          parent = self.session.unpack(msg[1].bytes)
                          idents = [engine, idents[0]]
                          # build fake error reply
                          try:
                              raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
                          except:
                              content = error.wrap_exception()
                          # build fake metadata
                          md = dict(
                              status=u'error',
                              engine=engine.decode('ascii'),
                              date=datetime.now(),
                          )
                          msg = self.session.msg('apply_reply', content, parent=parent, metadata=md)
                          raw_reply = list(map(zmq.Message, self.session.serialize(msg, ident=idents)))
                          # and dispatch it
                          self.dispatch_result(raw_reply)
                      # finally scrub completed/failed lists
                      self.completed.pop(engine)
                      self.failed.pop(engine)
                  #-----------------------------------------------------------------------
                  # Job Submission
                  #-----------------------------------------------------------------------
                  @util.log_errors
                  def dispatch_submission(self, raw_msg):
                      """Dispatch job submission to appropriate handlers."""
                      # ensure targets up to date:
                      self.notifier_stream.flush()
                      try:
                          idents, msg = self.session.feed_identities(raw_msg, copy=False)
                          msg = self.session.unserialize(msg, content=False, copy=False)
                      except Exception:
                          self.log.error("task::Invaid task msg: %r"%raw_msg, exc_info=True)
                          return
                      # send to monitor
                      self.mon_stream.send_multipart([b'intask']+raw_msg, copy=False)
                      header = msg['header']
                      md = msg['metadata']
                      msg_id = header['msg_id']
                      self.all_ids.add(msg_id)
                      # get targets as a set of bytes objects
                      # from a list of unicode objects
                      targets = md.get('targets', [])
                      targets = set(map(cast_bytes, targets))
                      retries = md.get('retries', 0)
                      self.retries[msg_id] = retries
                      # time dependencies
                      after = md.get('after', None)
                      if after:
                          after = Dependency(after)
                          if after.all:
                              if after.success:
                                  after = Dependency(after.difference(self.all_completed),
                                              success=after.success,
                                              failure=after.failure,
                                              all=after.all,
                                  )
                              if after.failure:
                                  after = Dependency(after.difference(self.all_failed),
                                              success=after.success,
                                              failure=after.failure,
                                              all=after.all,
                                  )
                          if after.check(self.all_completed, self.all_failed):
                              # recast as empty set, if `after` already met,
                              # to prevent unnecessary set comparisons
                              after = MET
                      else:
                          after = MET
                      # location dependencies
                      follow = Dependency(md.get('follow', []))
                      timeout = md.get('timeout', None)
                      if timeout:
                          timeout = float(timeout)
                      job = Job(msg_id=msg_id, raw_msg=raw_msg, idents=idents, msg=msg,
                               header=header, targets=targets, after=after, follow=follow,
                               timeout=timeout, metadata=md,
                      )
                      # validate and reduce dependencies:
                      for dep in after,follow:
                          if not dep: # empty dependency
                              continue
                          # check valid:
                          if msg_id in dep or dep.difference(self.all_ids):
                              self.queue_map[msg_id] = job
                              return self.fail_unreachable(msg_id, error.InvalidDependency)
                          # check if unreachable:
                          if dep.unreachable(self.all_completed, self.all_failed):
                              self.queue_map[msg_id] = job
                              return self.fail_unreachable(msg_id)
                      if after.check(self.all_completed, self.all_failed):
                          # time deps already met, try to run
                          if not self.maybe_run(job):
                              # can't run yet
                              if msg_id not in self.all_failed:
                                  # could have failed as unreachable
                                  self.save_unmet(job)
                      else:
                          self.save_unmet(job)
                  def job_timeout(self, job, timeout_id):
                      """callback for a job's timeout.
                      The job may or may not have been run at this point.
                      """
                      if job.timeout_id != timeout_id:
                          # not the most recent call
                          return
                      now = time.time()
                      if job.timeout >= (now + 1):
                          self.log.warn("task %s timeout fired prematurely: %s > %s",
                              job.msg_id, job.timeout, now
                          )
                      if job.msg_id in self.queue_map:
                          # still waiting, but ran out of time
                          self.log.info("task %r timed out", job.msg_id)
                          self.fail_unreachable(job.msg_id, error.TaskTimeout)
                  def fail_unreachable(self, msg_id, why=error.ImpossibleDependency):
                      """a task has become unreachable, send a reply with an ImpossibleDependency
                      error."""
                      if msg_id not in self.queue_map:
                          self.log.error("task %r already failed!", msg_id)
                          return
                      job = self.queue_map.pop(msg_id)
                      # lazy-delete from the queue
                      job.removed = True
                      for mid in job.dependents:
                          if mid in self.graph:
                              self.graph[mid].remove(msg_id)
                      try:
                          raise why()
                      except:
                          content = error.wrap_exception()
                      self.log.debug("task %r failing as unreachable with: %s", msg_id, content['ename'])
                      self.all_done.add(msg_id)
                      self.all_failed.add(msg_id)
                      msg = self.session.send(self.client_stream, 'apply_reply', content,
                                                              parent=job.header, ident=job.idents)
                      self.session.send(self.mon_stream, msg, ident=[b'outtask']+job.idents)
                      self.update_graph(msg_id, success=False)
                  def available_engines(self):
                      """return a list of available engine indices based on HWM"""
                      if not self.hwm:
                          return list(range(len(self.targets)))
                      available = []
                      for idx in range(len(self.targets)):
                          if self.loads[idx] < self.hwm:
                              available.append(idx)
                      return available
                  def maybe_run(self, job):
                      """check location dependencies, and run if they are met."""
                      msg_id = job.msg_id
                      self.log.debug("Attempting to assign task %s", msg_id)
                      available = self.available_engines()
                      if not available:
                          # no engines, definitely can't run
                          return False
                      if job.follow or job.targets or job.blacklist or self.hwm:
                          # we need a can_run filter
                          def can_run(idx):
                              # check hwm
                              if self.hwm and self.loads[idx] == self.hwm:
                                  return False
                              target = self.targets[idx]
                              # check blacklist
                              if target in job.blacklist:
                                  return False
                              # check targets
                              if job.targets and target not in job.targets:
                                  return False
                              # check follow
                              return job.follow.check(self.completed[target], self.failed[target])
                          indices = list(filter(can_run, available))
                          if not indices:
                              # couldn't run
                              if job.follow.all:
                                  # check follow for impossibility
                                  dests = set()
                                  relevant = set()
                                  if job.follow.success:
                                      relevant = self.all_completed
                                  if job.follow.failure:
                                      relevant = relevant.union(self.all_failed)
                                  for m in job.follow.intersection(relevant):
                                      dests.add(self.destinations[m])
                                  if len(dests) > 1:
                                      self.queue_map[msg_id] = job
                                      self.fail_unreachable(msg_id)
                                      return False
                              if job.targets:
                                  # check blacklist+targets for impossibility
                                  job.targets.difference_update(job.blacklist)
                                  if not job.targets or not job.targets.intersection(self.targets):
                                      self.queue_map[msg_id] = job
                                      self.fail_unreachable(msg_id)
                                      return False
                              return False
                      else:
                          indices = None
                      self.submit_task(job, indices)
                      return True
                  def save_unmet(self, job):
                      """Save a message for later submission when its dependencies are met."""
                      msg_id = job.msg_id
                      self.log.debug("Adding task %s to the queue", msg_id)
                      self.queue_map[msg_id] = job
                      self.queue.append(job)
                      # track the ids in follow or after, but not those already finished
                      for dep_id in job.after.union(job.follow).difference(self.all_done):
                          if dep_id not in self.graph:
                              self.graph[dep_id] = set()
                          self.graph[dep_id].add(msg_id)
                      # schedule timeout callback
                      if job.timeout:
                          timeout_id = job.timeout_id = job.timeout_id + 1
                          self.loop.add_timeout(time.time() + job.timeout,
                              lambda : self.job_timeout(job, timeout_id)
                          )
                  def submit_task(self, job, indices=None):
                      """Submit a task to any of a subset of our targets."""
                      if indices:
                          loads = [self.loads[i] for i in indices]
                      else:
                          loads = self.loads
                      idx = self.scheme(loads)
                      if indices:
                          idx = indices[idx]
                      target = self.targets[idx]
                      # print (target, map(str, msg[:3]))
                      # send job to the engine
                      self.engine_stream.send(target, flags=zmq.SNDMORE, copy=False)
                      self.engine_stream.send_multipart(job.raw_msg, copy=False)
                      # update load
                      self.add_job(idx)
                      self.pending[target][job.msg_id] = job
                      # notify Hub
                      content = dict(msg_id=job.msg_id, engine_id=target.decode('ascii'))
                      self.session.send(self.mon_stream, 'task_destination', content=content,
                                      ident=[b'tracktask',self.ident])
                  #-----------------------------------------------------------------------
                  # Result Handling
                  #-----------------------------------------------------------------------
                  @util.log_errors
                  def dispatch_result(self, raw_msg):
                      """dispatch method for result replies"""
                      try:
                          idents,msg = self.session.feed_identities(raw_msg, copy=False)
                          msg = self.session.unserialize(msg, content=False, copy=False)
                          engine = idents[0]
                          try:
                              idx = self.targets.index(engine)
                          except ValueError:
                              pass # skip load-update for dead engines
                          else:
                              self.finish_job(idx)
                      except Exception:
                          self.log.error("task::Invalid result: %r", raw_msg, exc_info=True)
                          return
                      md = msg['metadata']
                      parent = msg['parent_header']
                      if md.get('dependencies_met', True):
                          success = (md['status'] == 'ok')
                          msg_id = parent['msg_id']
                          retries = self.retries[msg_id]
                          if not success and retries > 0:
                              # failed
                              self.retries[msg_id] = retries - 1
                              self.handle_unmet_dependency(idents, parent)
                          else:
                              del self.retries[msg_id]
                              # relay to client and update graph
                              self.handle_result(idents, parent, raw_msg, success)
                              # send to Hub monitor
                              self.mon_stream.send_multipart([b'outtask']+raw_msg, copy=False)
                      else:
                          self.handle_unmet_dependency(idents, parent)
                  def handle_result(self, idents, parent, raw_msg, success=True):
                      """handle a real task result, either success or failure"""
                      # first, relay result to client
                      engine = idents[0]
                      client = idents[1]
                      # swap_ids for ROUTER-ROUTER mirror
                      raw_msg[:2] = [client,engine]
                      # print (map(str, raw_msg[:4]))
                      self.client_stream.send_multipart(raw_msg, copy=False)
                      # now, update our data structures
                      msg_id = parent['msg_id']
                      self.pending[engine].pop(msg_id)
                      if success:
                          self.completed[engine].add(msg_id)
                          self.all_completed.add(msg_id)
                      else:
                          self.failed[engine].add(msg_id)
                          self.all_failed.add(msg_id)
                      self.all_done.add(msg_id)
                      self.destinations[msg_id] = engine
                      self.update_graph(msg_id, success)
                  def handle_unmet_dependency(self, idents, parent):
                      """handle an unmet dependency"""
                      engine = idents[0]
                      msg_id = parent['msg_id']
                      job = self.pending[engine].pop(msg_id)
                      job.blacklist.add(engine)
                      if job.blacklist == job.targets:
                          self.queue_map[msg_id] = job
                          self.fail_unreachable(msg_id)
                      elif not self.maybe_run(job):
                          # resubmit failed
                          if msg_id not in self.all_failed:
                              # put it back in our dependency tree
                              self.save_unmet(job)
                      if self.hwm:
                          try:
                              idx = self.targets.index(engine)
                          except ValueError:
                              pass # skip load-update for dead engines
                          else:
                              if self.loads[idx] == self.hwm-1:
                                  self.update_graph(None)
                  def update_graph(self, dep_id=None, success=True):
                      """dep_id just finished. Update our dependency
                      graph and submit any jobs that just became runnable.
                      Called with dep_id=None to update entire graph for hwm, but without finishing a task.
                      """
                      # print ("\n\n***********")
                      # pprint (dep_id)
                      # pprint (self.graph)
                      # pprint (self.queue_map)
                      # pprint (self.all_completed)
                      # pprint (self.all_failed)
                      # print ("\n\n***********\n\n")
                      # update any jobs that depended on the dependency
                      msg_ids = self.graph.pop(dep_id, [])
                      # recheck *all* jobs if
                      # a) we have HWM and an engine just become no longer full
                      # or b) dep_id was given as None
                      if dep_id is None or self.hwm and any( [ load==self.hwm-1 for load in self.loads ]):
                          jobs = self.queue
                          using_queue = True
                      else:
                          using_queue = False
                          jobs = deque(sorted( self.queue_map[msg_id] for msg_id in msg_ids ))
                      to_restore = []
                      while jobs:
                          job = jobs.popleft()
                          if job.removed:
                              continue
                          msg_id = job.msg_id
                          put_it_back = True
                          if job.after.unreachable(self.all_completed, self.all_failed)\
                                  or job.follow.unreachable(self.all_completed, self.all_failed):
                              self.fail_unreachable(msg_id)
                              put_it_back = False
                          elif job.after.check(self.all_completed, self.all_failed): # time deps met, maybe run
                              if self.maybe_run(job):
                                  put_it_back = False
                                  self.queue_map.pop(msg_id)
                                  for mid in job.dependents:
                                      if mid in self.graph:
                                          self.graph[mid].remove(msg_id)
                                  # abort the loop if we just filled up all of our engines.
                                  # avoids an O(N) operation in situation of full queue,
                                  # where graph update is triggered as soon as an engine becomes
                                  # non-full, and all tasks after the first are checked,
                                  # even though they can't run.
                                  if not self.available_engines():
                                      break
                          if using_queue and put_it_back:
                              # popped a job from the queue but it neither ran nor failed,
                              # so we need to put it back when we are done
                              # make sure to_restore preserves the same ordering
                              to_restore.append(job)
                      # put back any tasks we popped but didn't run
                      if using_queue:
                          self.queue.extendleft(to_restore)
                  #----------------------------------------------------------------------
                  # methods to be overridden by subclasses
                  #----------------------------------------------------------------------
                  def add_job(self, idx):
                      """Called after self.targets[idx] just got the job with header.
                      Override with subclasses.  The default ordering is simple LRU.
                      The default loads are the number of outstanding jobs."""
                      self.loads[idx] += 1
                      for lis in (self.targets, self.loads):
                          lis.append(lis.pop(idx))
                  def finish_job(self, idx):
                      """Called after self.targets[idx] just finished a job.
                      Override with subclasses."""
                      self.loads[idx] -= 1
              def launch_scheduler(in_addr, out_addr, mon_addr, not_addr, reg_addr, config=None,
                                      logname='root', log_url=None, loglevel=logging.DEBUG,
                                      identity=b'task', in_thread=False):
                  ZMQStream = zmqstream.ZMQStream
                  if config:
                      # unwrap dict back into Config
                      config = Config(config)
                  if in_thread:
                      # use instance() to get the same Context/Loop as our parent
                      ctx = zmq.Context.instance()
                      loop = ioloop.IOLoop.instance()
                  else:
                      # in a process, don't use instance()
                      # for safety with multiprocessing
                      ctx = zmq.Context()
                      loop = ioloop.IOLoop()
                  ins = ZMQStream(ctx.socket(zmq.ROUTER),loop)
                  util.set_hwm(ins, 0)
                  ins.setsockopt(zmq.IDENTITY, identity + b'_in')
                  ins.bind(in_addr)
                  outs = ZMQStream(ctx.socket(zmq.ROUTER),loop)
                  util.set_hwm(outs, 0)
                  outs.setsockopt(zmq.IDENTITY, identity + b'_out')
                  outs.bind(out_addr)
                  mons = zmqstream.ZMQStream(ctx.socket(zmq.PUB),loop)
                  util.set_hwm(mons, 0)
                  mons.connect(mon_addr)
                  nots = zmqstream.ZMQStream(ctx.socket(zmq.SUB),loop)
                  nots.setsockopt(zmq.SUBSCRIBE, b'')
                  nots.connect(not_addr)
                  querys = ZMQStream(ctx.socket(zmq.DEALER),loop)
                  querys.connect(reg_addr)
                  # setup logging.
                  if in_thread:
                      log = Application.instance().log
                  else:
                      if log_url:
                          log = connect_logger(logname, ctx, log_url, root="scheduler", loglevel=loglevel)
                      else:
                          log = local_logger(logname, loglevel)
                  scheduler = TaskScheduler(client_stream=ins, engine_stream=outs,
                                          mon_stream=mons, notifier_stream=nots,
                                          query_stream=querys,
                                          loop=loop, log=log,
                                          config=config)
                  scheduler.start()
                  if not in_thread:
                      try:
                          loop.start()
                      except KeyboardInterrupt:
                          scheduler.log.critical("Interrupted, exiting...")

IPython/parallel/engine/engine.py

0 +6 -6

              """A simple engine that talks to a controller over 0MQ.
              it handles registration, etc. and launches a kernel
              connected to the Controller's Schedulers.
              """
              # Copyright (c) IPython Development Team.
              # Distributed under the terms of the Modified BSD License.
              from __future__ import print_function
              import sys
              import time
              from getpass import getpass
              import zmq
              from zmq.eventloop import ioloop, zmqstream
              from IPython.utils.localinterfaces import localhost
              from IPython.utils.traitlets import (
                  Instance, Dict, Integer, Type, Float, Unicode, CBytes, Bool
              )
              from IPython.utils.py3compat import cast_bytes
              from IPython.parallel.controller.heartmonitor import Heart
              from IPython.parallel.factory import RegistrationFactory
              from IPython.parallel.util import disambiguate_url
-             from IPython.kernel.zmq.session import Message
              from IPython.kernel.zmq.ipkernel import IPythonKernel as Kernel
              from IPython.kernel.zmq.kernelapp import IPKernelApp
              class EngineFactory(RegistrationFactory):
                  """IPython engine"""
                  # configurables:
                  out_stream_factory=Type('IPython.kernel.zmq.iostream.OutStream', config=True,
                      help="""The OutStream for handling stdout/err.
                      Typically 'IPython.kernel.zmq.iostream.OutStream'""")
                  display_hook_factory=Type('IPython.kernel.zmq.displayhook.ZMQDisplayHook', config=True,
                      help="""The class for handling displayhook.
                      Typically 'IPython.kernel.zmq.displayhook.ZMQDisplayHook'""")
                  location=Unicode(config=True,
                      help="""The location (an IP address) of the controller.  This is
                      used for disambiguating URLs, to determine whether
                      loopback should be used to connect or the public address.""")
                  timeout=Float(5.0, config=True,
                      help="""The time (in seconds) to wait for the Controller to respond
                      to registration requests before giving up.""")
                  max_heartbeat_misses=Integer(50, config=True,
                      help="""The maximum number of times a check for the heartbeat ping of a
                      controller can be missed before shutting down the engine.
                      If set to 0, the check is disabled.""")
                  sshserver=Unicode(config=True,
                      help="""The SSH server to use for tunneling connections to the Controller.""")
                  sshkey=Unicode(config=True,
                      help="""The SSH private key file to use when tunneling connections to the Controller.""")
                  paramiko=Bool(sys.platform == 'win32', config=True,
                      help="""Whether to use paramiko instead of openssh for tunnels.""")
                  @property
                  def tunnel_mod(self):
                      from zmq.ssh import tunnel
                      return tunnel
                  # not configurable:
                  connection_info = Dict()
                  user_ns = Dict()
                  id = Integer(allow_none=True)
                  registrar = Instance('zmq.eventloop.zmqstream.ZMQStream')
                  kernel = Instance(Kernel)
                  hb_check_period=Integer()
                  # States for the heartbeat monitoring
                  # Initial values for monitored and pinged must satisfy "monitored > pinged == False" so that
                  # during the first check no "missed" ping is reported. Must be floats for Python 3 compatibility.
                  _hb_last_pinged = 0.0
                  _hb_last_monitored = 0.0
                  _hb_missed_beats = 0
                  # The zmq Stream which receives the pings from the Heart
                  _hb_listener = None
                  bident = CBytes()
                  ident = Unicode()
                  def _ident_changed(self, name, old, new):
                      self.bident = cast_bytes(new)
                  using_ssh=Bool(False)
                  def __init__(self, **kwargs):
                      super(EngineFactory, self).__init__(**kwargs)
                      self.ident = self.session.session
                  def init_connector(self):
                      """construct connection function, which handles tunnels."""
                      self.using_ssh = bool(self.sshkey or self.sshserver)
                      if self.sshkey and not self.sshserver:
                          # We are using ssh directly to the controller, tunneling localhost to localhost
                          self.sshserver = self.url.split('://')[1].split(':')[0]
                      if self.using_ssh:
                          if self.tunnel_mod.try_passwordless_ssh(self.sshserver, self.sshkey, self.paramiko):
                              password=False
                          else:
                              password = getpass("SSH Password for %s: "%self.sshserver)
                      else:
                          password = False
                      def connect(s, url):
                          url = disambiguate_url(url, self.location)
                          if self.using_ssh:
                              self.log.debug("Tunneling connection to %s via %s", url, self.sshserver)
                              return self.tunnel_mod.tunnel_connection(s, url, self.sshserver,
                                          keyfile=self.sshkey, paramiko=self.paramiko,
                                          password=password,
                              )
                          else:
                              return s.connect(url)
                      def maybe_tunnel(url):
                          """like connect, but don't complete the connection (for use by heartbeat)"""
                          url = disambiguate_url(url, self.location)
                          if self.using_ssh:
                              self.log.debug("Tunneling connection to %s via %s", url, self.sshserver)
                              url, tunnelobj = self.tunnel_mod.open_tunnel(url, self.sshserver,
                                          keyfile=self.sshkey, paramiko=self.paramiko,
                                          password=password,
                              )
                          return str(url)
                      return connect, maybe_tunnel
                  def register(self):
                      """send the registration_request"""
                      self.log.info("Registering with controller at %s"%self.url)
                      ctx = self.context
                      connect,maybe_tunnel = self.init_connector()
                      reg = ctx.socket(zmq.DEALER)
                      reg.setsockopt(zmq.IDENTITY, self.bident)
                      connect(reg, self.url)
                      self.registrar = zmqstream.ZMQStream(reg, self.loop)
                      content = dict(uuid=self.ident)
                      self.registrar.on_recv(lambda msg: self.complete_registration(msg, connect, maybe_tunnel))
                      # print (self.session.key)
                      self.session.send(self.registrar, "registration_request", content=content)
                  def _report_ping(self, msg):
                      """Callback for when the heartmonitor.Heart receives a ping"""
                      #self.log.debug("Received a ping: %s", msg)
                      self._hb_last_pinged = time.time()
                  def complete_registration(self, msg, connect, maybe_tunnel):
                      # print msg
-                     self._abort_dc.stop()
+                     self.loop.remove_timeout(self._abort_dc)
                      ctx = self.context
                      loop = self.loop
                      identity = self.bident
                      idents,msg = self.session.feed_identities(msg)
                      msg = self.session.unserialize(msg)
                      content = msg['content']
                      info = self.connection_info
                      def url(key):
                          """get zmq url for given channel"""
                          return str(info["interface"] + ":%i" % info[key])
                      if content['status'] == 'ok':
                          self.id = int(content['id'])
                          # launch heartbeat
                          # possibly forward hb ports with tunnels
                          hb_ping = maybe_tunnel(url('hb_ping'))
                          hb_pong = maybe_tunnel(url('hb_pong'))
                          hb_monitor = None
                          if self.max_heartbeat_misses > 0:
                              # Add a monitor socket which will record the last time a ping was seen
                              mon = self.context.socket(zmq.SUB)
                              mport = mon.bind_to_random_port('tcp://%s' % localhost())
                              mon.setsockopt(zmq.SUBSCRIBE, b"")
                              self._hb_listener = zmqstream.ZMQStream(mon, self.loop)
                              self._hb_listener.on_recv(self._report_ping)
                              hb_monitor = "tcp://%s:%i" % (localhost(), mport)
                          heart = Heart(hb_ping, hb_pong, hb_monitor , heart_id=identity)
                          heart.start()
                          # create Shell Connections (MUX, Task, etc.):
                          shell_addrs = url('mux'), url('task')
                          # Use only one shell stream for mux and tasks
                          stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
                          stream.setsockopt(zmq.IDENTITY, identity)
                          shell_streams = [stream]
                          for addr in shell_addrs:
                              connect(stream, addr)
                          # control stream:
                          control_addr = url('control')
                          control_stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
                          control_stream.setsockopt(zmq.IDENTITY, identity)
                          connect(control_stream, control_addr)
                          # create iopub stream:
                          iopub_addr = url('iopub')
                          iopub_socket = ctx.socket(zmq.PUB)
                          iopub_socket.setsockopt(zmq.IDENTITY, identity)
                          connect(iopub_socket, iopub_addr)
                          # disable history:
                          self.config.HistoryManager.hist_file = ':memory:'
                          # Redirect input streams and set a display hook.
                          if self.out_stream_factory:
                              sys.stdout = self.out_stream_factory(self.session, iopub_socket, u'stdout')
                              sys.stdout.topic = cast_bytes('engine.%i.stdout' % self.id)
                              sys.stderr = self.out_stream_factory(self.session, iopub_socket, u'stderr')
                              sys.stderr.topic = cast_bytes('engine.%i.stderr' % self.id)
                          if self.display_hook_factory:
                              sys.displayhook = self.display_hook_factory(self.session, iopub_socket)
                              sys.displayhook.topic = cast_bytes('engine.%i.execute_result' % self.id)
                          self.kernel = Kernel(parent=self, int_id=self.id, ident=self.ident, session=self.session,
                                  control_stream=control_stream, shell_streams=shell_streams, iopub_socket=iopub_socket,
                                  loop=loop, user_ns=self.user_ns, log=self.log)
                          self.kernel.shell.display_pub.topic = cast_bytes('engine.%i.displaypub' % self.id)
                          # periodically check the heartbeat pings of the controller
                          # Should be started here and not in "start()" so that the right period can be taken
                          # from the hubs HeartBeatMonitor.period
                          if self.max_heartbeat_misses > 0:
                              # Use a slightly bigger check period than the hub signal period to not warn unnecessary
                              self.hb_check_period = int(content['hb_period'])+10
                              self.log.info("Starting to monitor the heartbeat signal from the hub every %i ms." , self.hb_check_period)
                              self._hb_reporter = ioloop.PeriodicCallback(self._hb_monitor, self.hb_check_period, self.loop)
                              self._hb_reporter.start()
                          else:
                              self.log.info("Monitoring of the heartbeat signal from the hub is not enabled.")
                          # FIXME: This is a hack until IPKernelApp and IPEngineApp can be fully merged
                          app = IPKernelApp(parent=self, shell=self.kernel.shell, kernel=self.kernel, log=self.log)
                          app.init_profile_dir()
                          app.init_code()
                          self.kernel.start()
                      else:
                          self.log.fatal("Registration Failed: %s"%msg)
                          raise Exception("Registration Failed: %s"%msg)
                      self.log.info("Completed registration with id %i"%self.id)
                  def abort(self):
                      self.log.fatal("Registration timed out after %.1f seconds"%self.timeout)
                      if self.url.startswith('127.'):
                          self.log.fatal("""
                          If the controller and engines are not on the same machine,
                          you will have to instruct the controller to listen on an external IP (in ipcontroller_config.py):
                              c.HubFactory.ip='*' # for all interfaces, internal and external
                              c.HubFactory.ip='192.168.1.101' # or any interface that the engines can see
                          or tunnel connections via ssh.
                          """)
                      self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
                      time.sleep(1)
                      sys.exit(255)
                  def _hb_monitor(self):
                      """Callback to monitor the heartbeat from the controller"""
                      self._hb_listener.flush()
                      if self._hb_last_monitored > self._hb_last_pinged:
                          self._hb_missed_beats += 1
                          self.log.warn("No heartbeat in the last %s ms (%s time(s) in a row).", self.hb_check_period, self._hb_missed_beats)
                      else:
                          #self.log.debug("Heartbeat received (after missing %s beats).", self._hb_missed_beats)
                          self._hb_missed_beats = 0
                      if self._hb_missed_beats >= self.max_heartbeat_misses:
                          self.log.fatal("Maximum number of heartbeats misses reached (%s times %s ms), shutting down.",
                                         self.max_heartbeat_misses, self.hb_check_period)
                          self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
                          self.loop.stop()
                      self._hb_last_monitored = time.time()
                  def start(self):
-                     dc = ioloop.DelayedCallback(self.register, 0, self.loop)
-                     dc.start()
-                     self._abort_dc = ioloop.DelayedCallback(self.abort, self.timeout*1000, self.loop)
-                     self._abort_dc.start()
+                     loop = self.loop
+                     def _start():
+                         self.register()
+                         loop.add_timeout(loop.time() + self.timeout, self.abort)
+                     self.loop.add_callback(_start)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages