##// END OF EJS Templates
strip: switch to mergestate.clean()...
strip: switch to mergestate.clean() See the previous patches for why we're doing this.

File last commit:

r26900:d1c74164 default
r26988:7e38d49b default
Show More
verify.py
352 lines | 11.8 KiB | text/x-python | PythonLexer
Matt Mackall
Move repo.verify
r2778 # verify.py - repository integrity checking for Mercurial
#
Thomas Arendsen Hein
Updated copyright notices and add "and others" to "hg version"
r4635 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
Matt Mackall
Move repo.verify
r2778 #
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Matt Mackall
Move repo.verify
r2778
Gregory Szorc
verify: use absolute_import
r25991 from __future__ import absolute_import
Bryan O'Sullivan
verify: fix all doubled-slash sites (issue3665)
r17860 import os
Gregory Szorc
verify: use absolute_import
r25991
from .i18n import _
from .node import (
nullid,
short,
)
from . import (
error,
revlog,
util,
)
Matt Mackall
Move repo.verify
r2778
def verify(repo):
Matt Mackall
Use try/finally pattern to cleanup locks and transactions
r4915 lock = repo.lock()
try:
return _verify(repo)
finally:
Ronny Pfannschmidt
switch lock releasing in the core from gc to explicit
r8109 lock.release()
Matt Mackall
Use try/finally pattern to cleanup locks and transactions
r4915
Bryan O'Sullivan
verify: fix all doubled-slash sites (issue3665)
r17860 def _normpath(f):
# under hg < 2.4, convert didn't sanitize paths properly, so a
# converted repo may contain repeated slashes
while '//' in f:
f = f.replace('//', '/')
return f
Augie Fackler
verify: add a hook that can let extensions manipulate file lists...
r26900 def _validpath(repo, path):
"""Returns False if a path should NOT be treated as part of a repo.
For all in-core cases, this returns True, as we have no way for a
path to be mentioned in the history but not actually be
relevant. For narrow clones, this is important because many
filelogs will be missing, and changelog entries may mention
modified files that are outside the narrow scope.
"""
return True
Matt Mackall
Use try/finally pattern to cleanup locks and transactions
r4915 def _verify(repo):
Pierre-Yves David
clfilter: verify logic should be unfiltered...
r18003 repo = repo.unfiltered()
Matt Mackall
verify: lots of refactoring...
r6752 mflinkrevs = {}
Matt Mackall
Move repo.verify
r2778 filelinkrevs = {}
filenodes = {}
Matt Mackall
verify: lots of refactoring...
r6752 revisions = 0
Benoit Boissinot
verify: use set instead of dict
r8466 badrevs = set()
Matt Mackall
Move repo.verify
r2778 errors = [0]
warnings = [0]
Matt Mackall
verify: add some local variables
r6751 ui = repo.ui
cl = repo.changelog
mf = repo.manifest
Matt Mackall
verify: filter the candidate list for broken linkrevs
r9657 lrugetctx = util.lrucachefunc(repo.changectx)
Matt Mackall
Move repo.verify
r2778
Yuya Nishihara
verify: do not prevent verify repository containing hidden changesets...
r20530 if not repo.url().startswith('file:'):
Pierre-Yves David
error: get Abort from 'error' instead of 'util'...
r26587 raise error.Abort(_("cannot verify bundle or remote repos"))
John Mulligan
issue 1144: prevent traceback on verify of bundles
r7141
Matt Mackall
verify: report first bad changeset...
r5313 def err(linkrev, msg, filename=None):
Martin Geisler
code style: prefer 'is' and 'is not' tests with singletons
r13031 if linkrev is not None:
Benoit Boissinot
verify: use set instead of dict
r8466 badrevs.add(linkrev)
Matt Mackall
verify: report first bad changeset...
r5313 else:
Matt Mackall
verify: lots of refactoring...
r6752 linkrev = '?'
Matt Mackall
verify: report first bad changeset...
r5313 msg = "%s: %s" % (linkrev, msg)
if filename:
msg = "%s@%s" % (filename, msg)
Matt Mackall
verify: add some local variables
r6751 ui.warn(" " + msg + "\n")
Matt Mackall
Move repo.verify
r2778 errors[0] += 1
Matt Mackall
verify: lots of refactoring...
r6752 def exc(linkrev, msg, inst, filename=None):
if isinstance(inst, KeyboardInterrupt):
ui.warn(_("interrupted"))
raise
Matt Mackall
verify: report exceptions that don't have a str() method
r11752 if not str(inst):
inst = repr(inst)
Matt Mackall
verify: lots of refactoring...
r6752 err(linkrev, "%s: %s" % (msg, inst), filename)
Matt Mackall
Move repo.verify
r2778 def warn(msg):
Matt Mackall
verify: add some local variables
r6751 ui.warn(msg + "\n")
Matt Mackall
Move repo.verify
r2778 warnings[0] += 1
Benoit Boissinot
verify: reference the correct linkrev when a filelog is missing...
r8292 def checklog(obj, name, linkrev):
Matt Mackall
verify: lots of refactoring...
r6752 if not len(obj) and (havecl or havemf):
Benoit Boissinot
verify: reference the correct linkrev when a filelog is missing...
r8292 err(linkrev, _("empty or missing %s") % name)
Matt Mackall
verify: lots of refactoring...
r6752 return
Matt Mackall
Move repo.verify
r2778 d = obj.checksize()
if d[0]:
Matt Mackall
verify: report first bad changeset...
r5313 err(None, _("data length off by %d bytes") % d[0], name)
Matt Mackall
Move repo.verify
r2778 if d[1]:
Matt Mackall
verify: report first bad changeset...
r5313 err(None, _("index contains %d extra bytes") % d[1], name)
Matt Mackall
Move repo.verify
r2778
if obj.version != revlog.REVLOGV0:
if not revlogv1:
warn(_("warning: `%s' uses revlog format 1") % name)
elif revlogv1:
warn(_("warning: `%s' uses revlog format 0") % name)
Matt Mackall
verify: lots of refactoring...
r6752 def checkentry(obj, i, node, seen, linkrevs, f):
Matt Mackall
linkrev: take a revision number rather than a hash
r7361 lr = obj.linkrev(obj.rev(node))
Matt Mackall
verify: lots of refactoring...
r6752 if lr < 0 or (havecl and lr not in linkrevs):
if lr < 0 or lr >= len(cl):
Wagner Bruna
verify, i18n: fix unmarked strings
r7926 msg = _("rev %d points to nonexistent changeset %d")
else:
msg = _("rev %d points to unexpected changeset %d")
err(None, msg % (i, lr), f)
Matt Mackall
verify: lots of refactoring...
r6752 if linkrevs:
Matt Mackall
verify: filter the candidate list for broken linkrevs
r9657 if f and len(linkrevs) > 1:
try:
# attempt to filter down to real linkrevs
linkrevs = [l for l in linkrevs
if lrugetctx(l)[f].filenode() == node]
Brodie Rao
cleanup: replace naked excepts with except Exception: ...
r16689 except Exception:
Matt Mackall
verify: filter the candidate list for broken linkrevs
r9657 pass
Martin Geisler
coding style: use a space after comma...
r9198 warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
Matt Mackall
verify: lots of refactoring...
r6752 lr = None # can't be trusted
try:
p1, p2 = obj.parents(node)
if p1 not in seen and p1 != nullid:
err(lr, _("unknown parent 1 %s of %s") %
FUJIWARA Katsunori
verify: use appropriate local variable in "checkentry()"...
r17719 (short(p1), short(node)), f)
Matt Mackall
verify: lots of refactoring...
r6752 if p2 not in seen and p2 != nullid:
err(lr, _("unknown parent 2 %s of %s") %
FUJIWARA Katsunori
verify: use appropriate local variable in "checkentry()"...
r17719 (short(p2), short(node)), f)
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except Exception as inst:
Matt Mackall
verify: lots of refactoring...
r6752 exc(lr, _("checking parents of %s") % short(node), inst, f)
if node in seen:
FUJIWARA Katsunori
verify: use appropriate local variable in "checkentry()"...
r17719 err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
seen[node] = i
Matt Mackall
verify: lots of refactoring...
r6752 return lr
Sune Foldager
verify: report existence of journal
r9690 if os.path.exists(repo.sjoin("journal")):
ui.warn(_("abandoned transaction found - run hg recover\n"))
Matt Mackall
verify: add some local variables
r6751 revlogv1 = cl.version != revlog.REVLOGV0
if ui.verbose or not revlogv1:
ui.status(_("repository uses revlog format %d\n") %
Matt Mackall
Move repo.verify
r2778 (revlogv1 and 1 or 0))
Matt Mackall
verify: lots of refactoring...
r6752 havecl = len(cl) > 0
havemf = len(mf) > 0
Matt Mackall
verify: add some local variables
r6751 ui.status(_("checking changesets\n"))
FUJIWARA Katsunori
verify: rename "hasmanifest" variable for source code readability...
r17720 refersmf = False
Matt Mackall
verify: lots of refactoring...
r6752 seen = {}
Benoit Boissinot
verify: reference the correct linkrev when a filelog is missing...
r8292 checklog(cl, "changelog", 0)
Augie Fackler
verify: call ui.progress()
r10433 total = len(repo)
Matt Mackall
add __len__ and __iter__ methods to repo and revlog
r6750 for i in repo:
timeless
verify/progress: using gerund to indicate action and adding units
r12745 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
Matt Mackall
verify: add some local variables
r6751 n = cl.node(i)
Matt Mackall
verify: lots of refactoring...
r6752 checkentry(cl, i, n, seen, [i], "changelog")
Matt Mackall
Move repo.verify
r2778
try:
Matt Mackall
verify: add some local variables
r6751 changes = cl.read(n)
Patrick Mezard
verify: do not choke on valid changelog without manifest...
r17385 if changes[0] != nullid:
mflinkrevs.setdefault(changes[0], []).append(i)
FUJIWARA Katsunori
verify: rename "hasmanifest" variable for source code readability...
r17720 refersmf = True
Matt Mackall
verify: lots of refactoring...
r6752 for f in changes[3]:
Augie Fackler
verify: add a hook that can let extensions manipulate file lists...
r26900 if _validpath(repo, f):
filelinkrevs.setdefault(_normpath(f), []).append(i)
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except Exception as inst:
FUJIWARA Katsunori
verify: rename "hasmanifest" variable for source code readability...
r17720 refersmf = True
Matt Mackall
verify: lots of refactoring...
r6752 exc(i, _("unpacking changeset %s") % short(n), inst)
timeless
verify/progress: using gerund to indicate action and adding units
r12745 ui.progress(_('checking'), None)
Matt Mackall
Move repo.verify
r2778
Matt Mackall
verify: lots of refactoring...
r6752 ui.status(_("checking manifests\n"))
Matt Mackall
Move repo.verify
r2778 seen = {}
FUJIWARA Katsunori
verify: rename "hasmanifest" variable for source code readability...
r17720 if refersmf:
Patrick Mezard
verify: do not choke on valid changelog without manifest...
r17385 # Do not check manifest if there are only changelog entries with
# null manifests.
checklog(mf, "manifest", 0)
Augie Fackler
verify: call ui.progress()
r10433 total = len(mf)
Matt Mackall
verify: add some local variables
r6751 for i in mf:
timeless
verify/progress: using gerund to indicate action and adding units
r12745 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
Matt Mackall
verify: add some local variables
r6751 n = mf.node(i)
Matt Mackall
verify: lots of refactoring...
r6752 lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
if n in mflinkrevs:
del mflinkrevs[n]
Peter Arrenbrecht
verify: detect manifest revs not in any changeset
r8394 else:
err(lr, _("%s not in changesets") % short(n), "manifest")
Matt Mackall
Move repo.verify
r2778
try:
Matt Mackall
verify: add some local variables
r6751 for f, fn in mf.readdelta(n).iteritems():
Matt Mackall
verify: lots of refactoring...
r6752 if not f:
err(lr, _("file without name in manifest"))
Matt Mackall
verify: add a note about a paleo-bug...
r24481 elif f != "/dev/null": # ignore this in very old repos
Augie Fackler
verify: add a hook that can let extensions manipulate file lists...
r26900 if _validpath(repo, f):
filenodes.setdefault(
_normpath(f), {}).setdefault(fn, lr)
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except Exception as inst:
Matt Mackall
verify: lots of refactoring...
r6752 exc(lr, _("reading manifest delta %s") % short(n), inst)
timeless
verify/progress: using gerund to indicate action and adding units
r12745 ui.progress(_('checking'), None)
Matt Mackall
Move repo.verify
r2778
Matt Mackall
verify: add some local variables
r6751 ui.status(_("crosschecking files in changesets and manifests\n"))
Matt Mackall
Move repo.verify
r2778
Augie Fackler
verify: call ui.progress()
r10433 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
count = 0
Matt Mackall
verify: lots of refactoring...
r6752 if havemf:
Matt Mackall
many, many trivial check-code fixups
r10282 for c, m in sorted([(c, m) for m in mflinkrevs
for c in mflinkrevs[m]]):
Augie Fackler
verify: call ui.progress()
r10433 count += 1
Matt Mackall
verify: filter messages about missing null manifests (issue2900)
r14865 if m == nullid:
continue
Martin Geisler
progress: use a verb (+noun) in present participle
r10698 ui.progress(_('crosschecking'), count, total=total)
Matt Mackall
verify: improve handling of empty or missing files...
r5541 err(c, _("changeset refers to unknown manifest %s") % short(m))
Alejandro Santos
verify: fix scope issues with del statement
r9033 mflinkrevs = None # del is bad here due to scope issues
Matt Mackall
Move repo.verify
r2778
Matt Mackall
replace util.sort with sorted built-in...
r8209 for f in sorted(filelinkrevs):
Augie Fackler
verify: call ui.progress()
r10433 count += 1
Martin Geisler
progress: use a verb (+noun) in present participle
r10698 ui.progress(_('crosschecking'), count, total=total)
Matt Mackall
verify: improve handling of empty or missing files...
r5541 if f not in filenodes:
lr = filelinkrevs[f][0]
err(lr, _("in changeset but not in manifest"), f)
Matt Mackall
Move repo.verify
r2778
Matt Mackall
verify: lots of refactoring...
r6752 if havecl:
Matt Mackall
replace util.sort with sorted built-in...
r8209 for f in sorted(filenodes):
Augie Fackler
verify: call ui.progress()
r10433 count += 1
Martin Geisler
progress: use a verb (+noun) in present participle
r10698 ui.progress(_('crosschecking'), count, total=total)
Matt Mackall
verify: lots of refactoring...
r6752 if f not in filelinkrevs:
try:
Matt Mackall
linkrev: take a revision number rather than a hash
r7361 fl = repo.file(f)
lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
Brodie Rao
cleanup: replace naked excepts with except Exception: ...
r16689 except Exception:
Matt Mackall
verify: lots of refactoring...
r6752 lr = None
err(lr, _("in manifest but not in changeset"), f)
Martin Geisler
progress: use a verb (+noun) in present participle
r10698 ui.progress(_('crosschecking'), None)
Augie Fackler
verify: call ui.progress()
r10433
Matt Mackall
verify: add some local variables
r6751 ui.status(_("checking files\n"))
Adrian Buehlmann
verify: check repo.store
r6892
Benoit Boissinot
verify: use set instead of dict
r8466 storefiles = set()
Matt Mackall
store: change handling of decoding errors
r6900 for f, f2, size in repo.store.datafiles():
if not f:
err(None, _("cannot decode filename '%s'") % f2)
Thomas Arendsen Hein
verify: fix "missing revlog!" errors for revlog format v0 and add test...
r12170 elif size > 0 or not revlogv1:
Bryan O'Sullivan
verify: fix all doubled-slash sites (issue3665)
r17860 storefiles.add(_normpath(f))
Adrian Buehlmann
verify: check repo.store
r6892
Gregory Szorc
verify: print hint to run debugrebuildfncache...
r25653 fncachewarned = False
Matt Mackall
replace util.sort with sorted built-in...
r8209 files = sorted(set(filenodes) | set(filelinkrevs))
Augie Fackler
verify: call ui.progress()
r10433 total = len(files)
for i, f in enumerate(files):
Martin Geisler
progress: use a verb (+noun) in present participle
r10698 ui.progress(_('checking'), i, item=f, total=total)
Henrik Stuart
verify: avoid exception on missing file revlog...
r8291 try:
linkrevs = filelinkrevs[f]
except KeyError:
# in manifest but not in changelog
linkrevs = []
if linkrevs:
lr = linkrevs[0]
else:
lr = None
Benoit Boissinot
verify: do not abort on fully corrupted revlog
r7832 try:
fl = repo.file(f)
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except error.RevlogError as e:
Benoit Boissinot
verify: find correct first corrupted cset for missing/corrupted revlogs
r7833 err(lr, _("broken revlog! (%s)") % e, f)
Benoit Boissinot
verify: do not abort on fully corrupted revlog
r7832 continue
Adrian Buehlmann
verify: check repo.store
r6892
Matt Mackall
store: change handling of decoding errors
r6900 for ff in fl.files():
Adrian Buehlmann
verify: check repo.store
r6892 try:
Benoit Boissinot
verify: use set instead of dict
r8466 storefiles.remove(ff)
Adrian Buehlmann
verify: check repo.store
r6892 except KeyError:
Matt Mackall
verify: clarify misleading fncache message...
r25627 warn(_(" warning: revlog '%s' not in fncache!") % ff)
Gregory Szorc
verify: print hint to run debugrebuildfncache...
r25653 fncachewarned = True
Adrian Buehlmann
verify: check repo.store
r6892
Benoit Boissinot
verify: reference the correct linkrev when a filelog is missing...
r8292 checklog(fl, f, lr)
Matt Mackall
verify: report first bad changeset...
r5313 seen = {}
Matt Mackall
verify: initialize rp variable in case we hit out of memory
r11756 rp = None
Matt Mackall
add __len__ and __iter__ methods to repo and revlog
r6750 for i in fl:
Matt Mackall
Move repo.verify
r2778 revisions += 1
n = fl.node(i)
Henrik Stuart
verify: avoid exception on missing file revlog...
r8291 lr = checkentry(fl, i, n, seen, linkrevs, f)
Matt Mackall
verify: improve handling of empty or missing files...
r5541 if f in filenodes:
if havemf and n not in filenodes[f]:
Matt Mackall
verify: lots of refactoring...
r6752 err(lr, _("%s not in manifests") % (short(n)), f)
Matt Mackall
verify: improve handling of empty or missing files...
r5541 else:
del filenodes[f][n]
Matt Mackall
Move repo.verify
r2778
# verify contents
try:
Matt Mackall
verify: reduce memory footprint when unpacking files...
r11753 l = len(fl.read(n))
Matt Mackall
verify: lots of refactoring...
r6752 rp = fl.renamed(n)
Matt Mackall
verify: reduce memory footprint when unpacking files...
r11753 if l != fl.size(i):
Matt Mackall
verify: don't trip over binary files starting with 01 0a
r7675 if len(fl.revision(n)) != fl.size(i):
Matt Mackall
verify: lots of refactoring...
r6752 err(lr, _("unpacked size is %s, %s expected") %
Matt Mackall
verify: reduce memory footprint when unpacking files...
r11753 (l, fl.size(i)), f)
Mike Edgar
verify: report censored nodes if configured policy is abort
r22933 except error.CensoredNodeError:
Matt Mackall
censor: mark experimental option
r25846 # experimental config: censor.policy
Mike Edgar
verify: report censored nodes if configured policy is abort
r22933 if ui.config("censor", "policy", "abort") == "abort":
err(lr, _("censored file data"), f)
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except Exception as inst:
Matt Mackall
verify: lots of refactoring...
r6752 exc(lr, _("unpacking %s") % short(n), inst, f)
Matt Mackall
Move repo.verify
r2778
Matt Mackall
verify: add rename link checking
r3744 # check renames
try:
if rp:
Patrick Mezard
verify: detect file copy sources not in parents with --verbose
r9545 if lr is not None and ui.verbose:
ctx = lrugetctx(lr)
found = False
for pctx in ctx.parents():
if rp[0] in pctx:
found = True
break
if not found:
warn(_("warning: copy source of '%s' not"
" in parents of %s") % (f, ctx))
Matt Mackall
verify: add rename link checking
r3744 fl2 = repo.file(rp[0])
Matt Mackall
add __len__ and __iter__ methods to repo and revlog
r6750 if not len(fl2):
Matt Mackall
verify: lots of refactoring...
r6752 err(lr, _("empty or missing copy source revlog %s:%s")
Patrick Mezard
verify: check copy source revlog and nodeid
r6534 % (rp[0], short(rp[1])), f)
elif rp[1] == nullid:
Matt Mackall
verify: demote warning about nullid in copy to note
r8993 ui.note(_("warning: %s@%s: copy source"
" revision is nullid %s:%s\n")
Benoit Boissinot
c0bd7d8b69ef uses err() instead of warn() but prototype doesn't match...
r7004 % (f, lr, rp[0], short(rp[1])))
Patrick Mezard
verify: check copy source revlog and nodeid
r6534 else:
Peter Arrenbrecht
cleanup: drop variables for unused return values...
r7874 fl2.rev(rp[1])
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except Exception as inst:
Matt Mackall
verify: lots of refactoring...
r6752 exc(lr, _("checking rename of %s") % short(n), inst, f)
Matt Mackall
verify: add rename link checking
r3744
Matt Mackall
Move repo.verify
r2778 # cross-check
Matt Mackall
verify: improve handling of empty or missing files...
r5541 if f in filenodes:
Matt Mackall
many, many trivial check-code fixups
r10282 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
Matt Mackall
replace util.sort with sorted built-in...
r8209 for lr, node in sorted(fns):
Matt Mackall
verify: improve handling of empty or missing files...
r5541 err(lr, _("%s in manifests not found") % short(node), f)
Martin Geisler
progress: use a verb (+noun) in present participle
r10698 ui.progress(_('checking'), None)
Matt Mackall
Move repo.verify
r2778
Adrian Buehlmann
verify: check repo.store
r6892 for f in storefiles:
warn(_("warning: orphan revlog '%s'") % f)
Matt Mackall
verify: add some local variables
r6751 ui.status(_("%d files, %d changesets, %d total revisions\n") %
Matt Mackall
verify: lots of refactoring...
r6752 (len(files), len(cl), revisions))
Matt Mackall
Move repo.verify
r2778 if warnings[0]:
Matt Mackall
verify: add some local variables
r6751 ui.warn(_("%d warnings encountered!\n") % warnings[0])
Gregory Szorc
verify: print hint to run debugrebuildfncache...
r25653 if fncachewarned:
ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
'corrupt fncache\n'))
Matt Mackall
Move repo.verify
r2778 if errors[0]:
Matt Mackall
verify: add some local variables
r6751 ui.warn(_("%d integrity errors encountered!\n") % errors[0])
Matt Mackall
verify: lots of refactoring...
r6752 if badrevs:
Matt Mackall
verify: add some local variables
r6751 ui.warn(_("(first damaged changeset appears to be %d)\n")
Matt Mackall
verify: lots of refactoring...
r6752 % min(badrevs))
Matt Mackall
Move repo.verify
r2778 return 1