##// END OF EJS Templates
change how failed restarts are detected...
MinRK -
Show More
@@ -1,101 +1,114 b''
1 """A basic kernel monitor with autorestarting.
1 """A basic kernel monitor with autorestarting.
2
2
3 This watches a kernel's state using KernelManager.is_alive and auto
3 This watches a kernel's state using KernelManager.is_alive and auto
4 restarts the kernel if it dies.
4 restarts the kernel if it dies.
5
5
6 It is an incomplete base class, and must be subclassed.
6 It is an incomplete base class, and must be subclassed.
7 """
7 """
8
8
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10 # Copyright (C) 2013 The IPython Development Team
10 # Copyright (C) 2013 The IPython Development Team
11 #
11 #
12 # Distributed under the terms of the BSD License. The full license is in
12 # Distributed under the terms of the BSD License. The full license is in
13 # the file COPYING, distributed as part of this software.
13 # the file COPYING, distributed as part of this software.
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15
15
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17 # Imports
17 # Imports
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19
19
20 from IPython.config.configurable import LoggingConfigurable
20 from IPython.config.configurable import LoggingConfigurable
21 from IPython.utils.traitlets import (
21 from IPython.utils.traitlets import (
22 Instance, Float, Dict, Bool,
22 Instance, Float, Dict, Bool, Integer,
23 )
23 )
24
24
25 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
26 # Code
26 # Code
27 #-----------------------------------------------------------------------------
27 #-----------------------------------------------------------------------------
28
28
29 class KernelRestarter(LoggingConfigurable):
29 class KernelRestarter(LoggingConfigurable):
30 """Monitor and autorestart a kernel."""
30 """Monitor and autorestart a kernel."""
31
31
32 kernel_manager = Instance('IPython.kernel.KernelManager')
32 kernel_manager = Instance('IPython.kernel.KernelManager')
33
33
34 time_to_dead = Float(3.0, config=True,
34 time_to_dead = Float(3.0, config=True,
35 help="""Kernel heartbeat interval in seconds."""
35 help="""Kernel heartbeat interval in seconds."""
36 )
36 )
37
37
38 restart_limit = Integer(5, config=True,
39 help="""The number of consecutive autorestarts before the kernel is presumed dead."""
40 )
38 _restarting = Bool(False)
41 _restarting = Bool(False)
42 _restart_count = Integer(0)
39
43
40 callbacks = Dict()
44 callbacks = Dict()
41 def _callbacks_default(self):
45 def _callbacks_default(self):
42 return dict(restart=[], dead=[])
46 return dict(restart=[], dead=[])
43
47
44 def start(self):
48 def start(self):
45 """Start the polling of the kernel."""
49 """Start the polling of the kernel."""
46 raise NotImplementedError("Must be implemented in a subclass")
50 raise NotImplementedError("Must be implemented in a subclass")
47
51
48 def stop(self):
52 def stop(self):
49 """Stop the kernel polling."""
53 """Stop the kernel polling."""
50 raise NotImplementedError("Must be implemented in a subclass")
54 raise NotImplementedError("Must be implemented in a subclass")
51
55
52 def add_callback(self, f, event='restart'):
56 def add_callback(self, f, event='restart'):
53 """register a callback to fire on a particular event
57 """register a callback to fire on a particular event
54
58
55 Possible values for event:
59 Possible values for event:
56
60
57 'restart' (default): kernel has died, and will be restarted.
61 'restart' (default): kernel has died, and will be restarted.
58 'dead': restart has failed, kernel will be left dead.
62 'dead': restart has failed, kernel will be left dead.
59
63
60 """
64 """
61 self.callbacks[event].append(f)
65 self.callbacks[event].append(f)
62
66
63 def remove_callback(self, f, event='restart'):
67 def remove_callback(self, f, event='restart'):
64 """unregister a callback to fire on a particular event
68 """unregister a callback to fire on a particular event
65
69
66 Possible values for event:
70 Possible values for event:
67
71
68 'restart' (default): kernel has died, and will be restarted.
72 'restart' (default): kernel has died, and will be restarted.
69 'dead': restart has failed, kernel will be left dead.
73 'dead': restart has failed, kernel will be left dead.
70
74
71 """
75 """
72 try:
76 try:
73 self.callbacks[event].remove(f)
77 self.callbacks[event].remove(f)
74 except ValueError:
78 except ValueError:
75 pass
79 pass
76
80
77 def _fire_callbacks(self, event):
81 def _fire_callbacks(self, event):
78 """fire our callbacks for a particular event"""
82 """fire our callbacks for a particular event"""
79 for callback in self.callbacks[event]:
83 for callback in self.callbacks[event]:
80 try:
84 try:
81 callback()
85 callback()
82 except Exception as e:
86 except Exception as e:
83 self.log.error("KernelRestarter: %s callback %r failed", event, callback, exc_info=True)
87 self.log.error("KernelRestarter: %s callback %r failed", event, callback, exc_info=True)
84
88
85 def poll(self):
89 def poll(self):
86 self.log.debug('Polling kernel...')
90 self.log.debug('Polling kernel...')
87 if not self.kernel_manager.is_alive():
91 if not self.kernel_manager.is_alive():
88 if self._restarting:
92 if self._restarting:
93 self._restart_count += 1
94 else:
95 self._restart_count = 1
96
97 if self._restart_count >= self.restart_limit:
89 self.log.warn("KernelRestarter: restart failed")
98 self.log.warn("KernelRestarter: restart failed")
90 self._fire_callbacks('dead')
99 self._fire_callbacks('dead')
91 self._restarting = False
100 self._restarting = False
101 self._restart_count = 0
92 self.stop()
102 self.stop()
93 else:
103 else:
94 self.log.info('KernelRestarter: restarting kernel')
104 self.log.info('KernelRestarter: restarting kernel (%i/%i)',
105 self._restart_count,
106 self.restart_limit
107 )
95 self._fire_callbacks('restart')
108 self._fire_callbacks('restart')
96 self.kernel_manager.restart_kernel(now=True)
109 self.kernel_manager.restart_kernel(now=True)
97 self._restarting = True
110 self._restarting = True
98 else:
111 else:
99 if self._restarting:
112 if self._restarting:
100 self.log.debug("KernelRestarter: restart apparently succeeded")
113 self.log.debug("KernelRestarter: restart apparently succeeded")
101 self._restarting = False
114 self._restarting = False
General Comments 0
You need to be logged in to leave comments. Login now