##// END OF EJS Templates
lock: fix race in lock-breaking code...
lock: fix race in lock-breaking code With low frequency, I see hg pulls fail with output like: abort: no such file or directory: .hg/store/lock I think what happens is, in lock.py, in: def _testlock(self, locker): if not self._lockshouldbebroken(locker): return locker # if locker dead, break lock. must do this with another lock # held, or can race and break valid lock. try: with lock(self.vfs, self.f + b'.break', timeout=0): self.vfs.unlink(self.f) except error.LockError: return locker if a lock is breakable on disk, and two hg processes concurrently get to the "if locker dead" comment, a possible interleaving is: process1 finishes executing the function and then process2 finishes executing the function. If that happens, process2 will either get ENOENT in self.vfs.unlink (resulting in the spurious failure above), or break a valid lock and potentially cause repository corruption. The fix is simple enough: make sure the lock is breakable _inside_ the critical section, because only then can we know that no other process can invalidate our knowledge on the lock on disk. I don't think there are tests for this. I've tested this manually with: diff --git a/mercurial/lock.py b/mercurial/lock.py --- a/mercurial/lock.py +++ b/mercurial/lock.py @@ -351,6 +351,8 @@ class lock(object): if not self._lockshouldbebroken(locker): return locker + import random + time.sleep(1. + random.random()) # if locker dead, break lock. must do this with another lock # held, or can race and break valid lock. try: @@ -358,6 +360,7 @@ class lock(object): self.vfs.unlink(self.f) except error.LockError: return locker + time.sleep(1) def testlock(self): """return id of locker if lock is valid, else None. and I see this change of behavior before/after this commit: $ $hg init repo $ cd repo $ ln -s $HOSTNAME/effffffc:987654321 .hg/wlock $ touch a $ $hg commit -Am_ & $hg commit -Am _; wait -abort: No such file or directory: '/tmp/repo/.hg/wlock' adding a +warning: ignoring unknown working parent 679a8959a8ca! +nothing changed Differential Revision: https://phab.mercurial-scm.org/D7199

File last commit:

r43347:687b865b default
r44108:039fbd14 default
Show More
dirstateguard.py
84 lines | 2.6 KiB | text/x-python | PythonLexer
# dirstateguard.py - class to allow restoring dirstate after failure
#
# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
from .i18n import _
from . import (
error,
narrowspec,
util,
)
class dirstateguard(util.transactional):
'''Restore dirstate at unexpected failure.
At the construction, this class does:
- write current ``repo.dirstate`` out, and
- save ``.hg/dirstate`` into the backup file
This restores ``.hg/dirstate`` from backup file, if ``release()``
is invoked before ``close()``.
This just removes the backup file at ``close()`` before ``release()``.
'''
def __init__(self, repo, name):
self._repo = repo
self._active = False
self._closed = False
self._backupname = b'dirstate.backup.%s.%d' % (name, id(self))
self._narrowspecbackupname = b'narrowspec.backup.%s.%d' % (
name,
id(self),
)
repo.dirstate.savebackup(repo.currenttransaction(), self._backupname)
narrowspec.savewcbackup(repo, self._narrowspecbackupname)
self._active = True
def __del__(self):
if self._active: # still active
# this may occur, even if this class is used correctly:
# for example, releasing other resources like transaction
# may raise exception before ``dirstateguard.release`` in
# ``release(tr, ....)``.
self._abort()
def close(self):
if not self._active: # already inactivated
msg = (
_(b"can't close already inactivated backup: %s")
% self._backupname
)
raise error.Abort(msg)
self._repo.dirstate.clearbackup(
self._repo.currenttransaction(), self._backupname
)
narrowspec.clearwcbackup(self._repo, self._narrowspecbackupname)
self._active = False
self._closed = True
def _abort(self):
narrowspec.restorewcbackup(self._repo, self._narrowspecbackupname)
self._repo.dirstate.restorebackup(
self._repo.currenttransaction(), self._backupname
)
self._active = False
def release(self):
if not self._closed:
if not self._active: # already inactivated
msg = (
_(b"can't release already inactivated backup: %s")
% self._backupname
)
raise error.Abort(msg)
self._abort()