##// END OF EJS Templates
add secondary `debug` flag for heartbeats...
MinRK -
Show More
@@ -6,31 +6,27 b' restarts the kernel if it dies.'
6 It is an incomplete base class, and must be subclassed.
6 It is an incomplete base class, and must be subclassed.
7 """
7 """
8
8
9 #-----------------------------------------------------------------------------
9 # Copyright (c) IPython Development Team.
10 # Copyright (C) 2013 The IPython Development Team
10 # Distributed under the terms of the Modified BSD License.
11 #
12 # Distributed under the terms of the BSD License. The full license is in
13 # the file COPYING, distributed as part of this software.
14 #-----------------------------------------------------------------------------
15
16 #-----------------------------------------------------------------------------
17 # Imports
18 #-----------------------------------------------------------------------------
19
11
20 from IPython.config.configurable import LoggingConfigurable
12 from IPython.config.configurable import LoggingConfigurable
21 from IPython.utils.traitlets import (
13 from IPython.utils.traitlets import (
22 Instance, Float, Dict, Bool, Integer,
14 Instance, Float, Dict, Bool, Integer,
23 )
15 )
24
16
25 #-----------------------------------------------------------------------------
26 # Code
27 #-----------------------------------------------------------------------------
28
17
29 class KernelRestarter(LoggingConfigurable):
18 class KernelRestarter(LoggingConfigurable):
30 """Monitor and autorestart a kernel."""
19 """Monitor and autorestart a kernel."""
31
20
32 kernel_manager = Instance('IPython.kernel.KernelManager')
21 kernel_manager = Instance('IPython.kernel.KernelManager')
33
22
23 debug = Bool(False, config=True,
24 help="""Whether to include every poll event in debugging output.
25
26 Has to be set explicitly, because there will be *a lot* of output.
27 """
28 )
29
34 time_to_dead = Float(3.0, config=True,
30 time_to_dead = Float(3.0, config=True,
35 help="""Kernel heartbeat interval in seconds."""
31 help="""Kernel heartbeat interval in seconds."""
36 )
32 )
@@ -87,7 +83,8 b' class KernelRestarter(LoggingConfigurable):'
87 self.log.error("KernelRestarter: %s callback %r failed", event, callback, exc_info=True)
83 self.log.error("KernelRestarter: %s callback %r failed", event, callback, exc_info=True)
88
84
89 def poll(self):
85 def poll(self):
90 self.log.debug('Polling kernel...')
86 if self.debug:
87 self.log.debug('Polling kernel...')
91 if not self.kernel_manager.is_alive():
88 if not self.kernel_manager.is_alive():
92 if self._restarting:
89 if self._restarting:
93 self._restart_count += 1
90 self._restart_count += 1
@@ -2,17 +2,10 b''
2 """
2 """
3 A multi-heart Heartbeat system using PUB and ROUTER sockets. pings are sent out on the PUB,
3 A multi-heart Heartbeat system using PUB and ROUTER sockets. pings are sent out on the PUB,
4 and hearts are tracked based on their DEALER identities.
4 and hearts are tracked based on their DEALER identities.
5
6 Authors:
7
8 * Min RK
9 """
5 """
10 #-----------------------------------------------------------------------------
6
11 # Copyright (C) 2010-2011 The IPython Development Team
7 # Copyright (c) IPython Development Team.
12 #
8 # Distributed under the terms of the Modified BSD License.
13 # Distributed under the terms of the BSD License. The full license is in
14 # the file COPYING, distributed as part of this software.
15 #-----------------------------------------------------------------------------
16
9
17 from __future__ import print_function
10 from __future__ import print_function
18 import time
11 import time
@@ -24,7 +17,7 b' from zmq.eventloop import ioloop, zmqstream'
24
17
25 from IPython.config.configurable import LoggingConfigurable
18 from IPython.config.configurable import LoggingConfigurable
26 from IPython.utils.py3compat import str_to_bytes
19 from IPython.utils.py3compat import str_to_bytes
27 from IPython.utils.traitlets import Set, Instance, CFloat, Integer, Dict
20 from IPython.utils.traitlets import Set, Instance, CFloat, Integer, Dict, Bool
28
21
29 from IPython.parallel.util import log_errors
22 from IPython.parallel.util import log_errors
30
23
@@ -69,7 +62,13 b' class HeartMonitor(LoggingConfigurable):'
69 pingstream: a PUB stream
62 pingstream: a PUB stream
70 pongstream: an ROUTER stream
63 pongstream: an ROUTER stream
71 period: the period of the heartbeat in milliseconds"""
64 period: the period of the heartbeat in milliseconds"""
72
65
66 debug = Bool(False, config=True,
67 help="""Whether to include every heartbeat in debugging output.
68
69 Has to be set explicitly, because there will be *a lot* of output.
70 """
71 )
73 period = Integer(3000, config=True,
72 period = Integer(3000, config=True,
74 help='The frequency at which the Hub pings the engines for heartbeats '
73 help='The frequency at which the Hub pings the engines for heartbeats '
75 '(in ms)',
74 '(in ms)',
@@ -121,7 +120,8 b' class HeartMonitor(LoggingConfigurable):'
121 toc = time.time()
120 toc = time.time()
122 self.lifetime += toc-self.tic
121 self.lifetime += toc-self.tic
123 self.tic = toc
122 self.tic = toc
124 self.log.debug("heartbeat::sending %s", self.lifetime)
123 if self.debug:
124 self.log.debug("heartbeat::sending %s", self.lifetime)
125 goodhearts = self.hearts.intersection(self.responses)
125 goodhearts = self.hearts.intersection(self.responses)
126 missed_beats = self.hearts.difference(goodhearts)
126 missed_beats = self.hearts.difference(goodhearts)
127 newhearts = self.responses.difference(goodhearts)
127 newhearts = self.responses.difference(goodhearts)
@@ -181,7 +181,8 b' class HeartMonitor(LoggingConfigurable):'
181 last = str_to_bytes(str(self.last_ping))
181 last = str_to_bytes(str(self.last_ping))
182 if msg[1] == current:
182 if msg[1] == current:
183 delta = time.time()-self.tic
183 delta = time.time()-self.tic
184 # self.log.debug("heartbeat::heart %r took %.2f ms to respond"%(msg[0], 1000*delta))
184 if self.debug:
185 self.log.debug("heartbeat::heart %r took %.2f ms to respond", msg[0], 1000*delta)
185 self.responses.add(msg[0])
186 self.responses.add(msg[0])
186 elif msg[1] == last:
187 elif msg[1] == last:
187 delta = time.time()-self.tic + (self.lifetime-self.last_ping)
188 delta = time.time()-self.tic + (self.lifetime-self.last_ping)
General Comments 0
You need to be logged in to leave comments. Login now