# HG changeset patch # User Yuya Nishihara # Date 2018-03-04 04:49:39 # Node ID d77c3b02339308f8f3abda27947cf9387aa1cbc8 # Parent e437de3881c14e984b2af1dd751245521d782dbf lock: block signal interrupt while making a lock file On Windows where symlink isn't supported, util.makelock() could leave an empty file if interrupted immediately after os.open(). This empty lock never dies as it has no process id recorded. ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL) # an interrupt may occur here os.write(ld, info) os.close(ld) This was a long-standing bug of TortoiseHg which runs a command-server and kills it by CTRL_C_EVENT, reported by random Windows users. https://bitbucket.org/tortoisehg/thg/issues/4873/#comment-43591129 At first, I tried to fix makelock() to clean up a stale lock file, which turned out to be hard because any instructions may be interrupted by a signal. ld = None try: # CALL_FUNCTION # os.open(...) # an interrupt may occur here # STORE_FAST # ld = ... ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL) os.write(ld, info) ... return True except: if ld: ... os.unlink(pathname) return False So I decided to block signals by temporarily replacing the signal handlers so makelcok() and held = 1 will never be interrupted. Many thanks to Fernando Najera for investigating the issue. diff --git a/mercurial/lock.py b/mercurial/lock.py --- a/mercurial/lock.py +++ b/mercurial/lock.py @@ -10,6 +10,7 @@ from __future__ import absolute_import import contextlib import errno import os +import signal import socket import time import warnings @@ -39,6 +40,64 @@ def _getlockprefix(): raise return result +@contextlib.contextmanager +def _delayedinterrupt(): + """Block signal interrupt while doing something critical + + This makes sure that the code block wrapped by this context manager won't + be interrupted. + + For Windows developers: It appears not possible to guard time.sleep() + from CTRL_C_EVENT, so please don't use time.sleep() to test if this is + working. + """ + assertedsigs = [] + blocked = False + orighandlers = {} + + def raiseinterrupt(num): + if (num == getattr(signal, 'SIGINT', None) or + num == getattr(signal, 'CTRL_C_EVENT', None)): + raise KeyboardInterrupt + else: + raise error.SignalInterrupt + def catchterm(num, frame): + if blocked: + assertedsigs.append(num) + else: + raiseinterrupt(num) + + try: + # save handlers first so they can be restored even if a setup is + # interrupted between signal.signal() and orighandlers[] =. + for name in ['CTRL_C_EVENT', 'SIGINT', 'SIGBREAK', 'SIGHUP', 'SIGTERM']: + num = getattr(signal, name, None) + if num and num not in orighandlers: + orighandlers[num] = signal.getsignal(num) + try: + for num in orighandlers: + signal.signal(num, catchterm) + except ValueError: + pass # in a thread? no luck + + blocked = True + yield + finally: + # no simple way to reliably restore all signal handlers because + # any loops, recursive function calls, except blocks, etc. can be + # interrupted. so instead, make catchterm() raise interrupt. + blocked = False + try: + for num, handler in orighandlers.items(): + signal.signal(num, handler) + except ValueError: + pass # in a thread? + + # re-raise interrupt exception if any, which may be shadowed by a new + # interrupt occurred while re-raising the first one + if assertedsigs: + raiseinterrupt(assertedsigs[0]) + def trylock(ui, vfs, lockname, timeout, warntimeout, *args, **kwargs): """return an acquired lock or raise an a LockHeld exception @@ -182,8 +241,9 @@ class lock(object): while not self.held and retry: retry -= 1 try: - self.vfs.makelock(lockname, self.f) - self.held = 1 + with _delayedinterrupt(): + self.vfs.makelock(lockname, self.f) + self.held = 1 except (OSError, IOError) as why: if why.errno == errno.EEXIST: locker = self._readlock() diff --git a/mercurial/util.py b/mercurial/util.py --- a/mercurial/util.py +++ b/mercurial/util.py @@ -1676,6 +1676,11 @@ if safehasattr(time, "perf_counter"): timer = time.perf_counter def makelock(info, pathname): + """Create a lock file atomically if possible + + This may leave a stale lock file if symlink isn't supported and signal + interrupt is enabled. + """ try: return os.symlink(info, pathname) except OSError as why: