##// END OF EJS Templates
merge with stable
merge with stable

File last commit:

r14872:15afa498 default
r14878:ddc4567a merge default
Show More
verify.py
305 lines | 10.2 KiB | text/x-python | PythonLexer
Matt Mackall
Move repo.verify
r2778 # verify.py - repository integrity checking for Mercurial
#
Thomas Arendsen Hein
Updated copyright notices and add "and others" to "hg version"
r4635 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
Matt Mackall
Move repo.verify
r2778 #
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Matt Mackall
Move repo.verify
r2778
Joel Rosdahl
Expand import * to allow Pyflakes to find problems
r6211 from node import nullid, short
Matt Mackall
Simplify i18n imports
r3891 from i18n import _
Sune Foldager
verify: report existence of journal
r9690 import os
Benoit Boissinot
verify: do not abort on fully corrupted revlog
r7832 import revlog, util, error
Matt Mackall
Move repo.verify
r2778
def verify(repo):
Matt Mackall
Use try/finally pattern to cleanup locks and transactions
r4915 lock = repo.lock()
try:
return _verify(repo)
finally:
Ronny Pfannschmidt
switch lock releasing in the core from gc to explicit
r8109 lock.release()
Matt Mackall
Use try/finally pattern to cleanup locks and transactions
r4915
def _verify(repo):
Matt Mackall
verify: lots of refactoring...
r6752 mflinkrevs = {}
Matt Mackall
Move repo.verify
r2778 filelinkrevs = {}
filenodes = {}
Matt Mackall
verify: lots of refactoring...
r6752 revisions = 0
Benoit Boissinot
verify: use set instead of dict
r8466 badrevs = set()
Matt Mackall
Move repo.verify
r2778 errors = [0]
warnings = [0]
Matt Mackall
verify: add some local variables
r6751 ui = repo.ui
cl = repo.changelog
mf = repo.manifest
Matt Mackall
verify: filter the candidate list for broken linkrevs
r9657 lrugetctx = util.lrucachefunc(repo.changectx)
Matt Mackall
Move repo.verify
r2778
John Mulligan
issue 1144: prevent traceback on verify of bundles
r7141 if not repo.cancopy():
raise util.Abort(_("cannot verify bundle or remote repos"))
Matt Mackall
verify: report first bad changeset...
r5313 def err(linkrev, msg, filename=None):
Martin Geisler
code style: prefer 'is' and 'is not' tests with singletons
r13031 if linkrev is not None:
Benoit Boissinot
verify: use set instead of dict
r8466 badrevs.add(linkrev)
Matt Mackall
verify: report first bad changeset...
r5313 else:
Matt Mackall
verify: lots of refactoring...
r6752 linkrev = '?'
Matt Mackall
verify: report first bad changeset...
r5313 msg = "%s: %s" % (linkrev, msg)
if filename:
msg = "%s@%s" % (filename, msg)
Matt Mackall
verify: add some local variables
r6751 ui.warn(" " + msg + "\n")
Matt Mackall
Move repo.verify
r2778 errors[0] += 1
Matt Mackall
verify: lots of refactoring...
r6752 def exc(linkrev, msg, inst, filename=None):
if isinstance(inst, KeyboardInterrupt):
ui.warn(_("interrupted"))
raise
Matt Mackall
verify: report exceptions that don't have a str() method
r11752 if not str(inst):
inst = repr(inst)
Matt Mackall
verify: lots of refactoring...
r6752 err(linkrev, "%s: %s" % (msg, inst), filename)
Matt Mackall
Move repo.verify
r2778 def warn(msg):
Matt Mackall
verify: add some local variables
r6751 ui.warn(msg + "\n")
Matt Mackall
Move repo.verify
r2778 warnings[0] += 1
Benoit Boissinot
verify: reference the correct linkrev when a filelog is missing...
r8292 def checklog(obj, name, linkrev):
Matt Mackall
verify: lots of refactoring...
r6752 if not len(obj) and (havecl or havemf):
Benoit Boissinot
verify: reference the correct linkrev when a filelog is missing...
r8292 err(linkrev, _("empty or missing %s") % name)
Matt Mackall
verify: lots of refactoring...
r6752 return
Matt Mackall
Move repo.verify
r2778 d = obj.checksize()
if d[0]:
Matt Mackall
verify: report first bad changeset...
r5313 err(None, _("data length off by %d bytes") % d[0], name)
Matt Mackall
Move repo.verify
r2778 if d[1]:
Matt Mackall
verify: report first bad changeset...
r5313 err(None, _("index contains %d extra bytes") % d[1], name)
Matt Mackall
Move repo.verify
r2778
if obj.version != revlog.REVLOGV0:
if not revlogv1:
warn(_("warning: `%s' uses revlog format 1") % name)
elif revlogv1:
warn(_("warning: `%s' uses revlog format 0") % name)
Matt Mackall
verify: lots of refactoring...
r6752 def checkentry(obj, i, node, seen, linkrevs, f):
Matt Mackall
linkrev: take a revision number rather than a hash
r7361 lr = obj.linkrev(obj.rev(node))
Matt Mackall
verify: lots of refactoring...
r6752 if lr < 0 or (havecl and lr not in linkrevs):
if lr < 0 or lr >= len(cl):
Wagner Bruna
verify, i18n: fix unmarked strings
r7926 msg = _("rev %d points to nonexistent changeset %d")
else:
msg = _("rev %d points to unexpected changeset %d")
err(None, msg % (i, lr), f)
Matt Mackall
verify: lots of refactoring...
r6752 if linkrevs:
Matt Mackall
verify: filter the candidate list for broken linkrevs
r9657 if f and len(linkrevs) > 1:
try:
# attempt to filter down to real linkrevs
linkrevs = [l for l in linkrevs
if lrugetctx(l)[f].filenode() == node]
except:
pass
Martin Geisler
coding style: use a space after comma...
r9198 warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
Matt Mackall
verify: lots of refactoring...
r6752 lr = None # can't be trusted
try:
p1, p2 = obj.parents(node)
if p1 not in seen and p1 != nullid:
err(lr, _("unknown parent 1 %s of %s") %
(short(p1), short(n)), f)
if p2 not in seen and p2 != nullid:
err(lr, _("unknown parent 2 %s of %s") %
(short(p2), short(p1)), f)
except Exception, inst:
exc(lr, _("checking parents of %s") % short(node), inst, f)
if node in seen:
err(lr, _("duplicate revision %d (%d)") % (i, seen[n]), f)
seen[n] = i
return lr
Sune Foldager
verify: report existence of journal
r9690 if os.path.exists(repo.sjoin("journal")):
ui.warn(_("abandoned transaction found - run hg recover\n"))
Matt Mackall
verify: add some local variables
r6751 revlogv1 = cl.version != revlog.REVLOGV0
if ui.verbose or not revlogv1:
ui.status(_("repository uses revlog format %d\n") %
Matt Mackall
Move repo.verify
r2778 (revlogv1 and 1 or 0))
Matt Mackall
verify: lots of refactoring...
r6752 havecl = len(cl) > 0
havemf = len(mf) > 0
Matt Mackall
verify: add some local variables
r6751 ui.status(_("checking changesets\n"))
Matt Mackall
verify: lots of refactoring...
r6752 seen = {}
Benoit Boissinot
verify: reference the correct linkrev when a filelog is missing...
r8292 checklog(cl, "changelog", 0)
Augie Fackler
verify: call ui.progress()
r10433 total = len(repo)
Matt Mackall
add __len__ and __iter__ methods to repo and revlog
r6750 for i in repo:
timeless
verify/progress: using gerund to indicate action and adding units
r12745 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
Matt Mackall
verify: add some local variables
r6751 n = cl.node(i)
Matt Mackall
verify: lots of refactoring...
r6752 checkentry(cl, i, n, seen, [i], "changelog")
Matt Mackall
Move repo.verify
r2778
try:
Matt Mackall
verify: add some local variables
r6751 changes = cl.read(n)
Matt Mackall
verify: lots of refactoring...
r6752 mflinkrevs.setdefault(changes[0], []).append(i)
for f in changes[3]:
filelinkrevs.setdefault(f, []).append(i)
Matt Mackall
Move repo.verify
r2778 except Exception, inst:
Matt Mackall
verify: lots of refactoring...
r6752 exc(i, _("unpacking changeset %s") % short(n), inst)
timeless
verify/progress: using gerund to indicate action and adding units
r12745 ui.progress(_('checking'), None)
Matt Mackall
Move repo.verify
r2778
Matt Mackall
verify: lots of refactoring...
r6752 ui.status(_("checking manifests\n"))
Matt Mackall
Move repo.verify
r2778 seen = {}
Benoit Boissinot
verify: reference the correct linkrev when a filelog is missing...
r8292 checklog(mf, "manifest", 0)
Augie Fackler
verify: call ui.progress()
r10433 total = len(mf)
Matt Mackall
verify: add some local variables
r6751 for i in mf:
timeless
verify/progress: using gerund to indicate action and adding units
r12745 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
Matt Mackall
verify: add some local variables
r6751 n = mf.node(i)
Matt Mackall
verify: lots of refactoring...
r6752 lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
if n in mflinkrevs:
del mflinkrevs[n]
Peter Arrenbrecht
verify: detect manifest revs not in any changeset
r8394 else:
err(lr, _("%s not in changesets") % short(n), "manifest")
Matt Mackall
Move repo.verify
r2778
try:
Matt Mackall
verify: add some local variables
r6751 for f, fn in mf.readdelta(n).iteritems():
Matt Mackall
verify: lots of refactoring...
r6752 if not f:
err(lr, _("file without name in manifest"))
elif f != "/dev/null":
Matt Mackall
verify: filter the candidate list for broken linkrevs
r9657 filenodes.setdefault(f, {}).setdefault(fn, lr)
Matt Mackall
Move repo.verify
r2778 except Exception, inst:
Matt Mackall
verify: lots of refactoring...
r6752 exc(lr, _("reading manifest delta %s") % short(n), inst)
timeless
verify/progress: using gerund to indicate action and adding units
r12745 ui.progress(_('checking'), None)
Matt Mackall
Move repo.verify
r2778
Matt Mackall
verify: add some local variables
r6751 ui.status(_("crosschecking files in changesets and manifests\n"))
Matt Mackall
Move repo.verify
r2778
Augie Fackler
verify: call ui.progress()
r10433 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
count = 0
Matt Mackall
verify: lots of refactoring...
r6752 if havemf:
Matt Mackall
many, many trivial check-code fixups
r10282 for c, m in sorted([(c, m) for m in mflinkrevs
for c in mflinkrevs[m]]):
Augie Fackler
verify: call ui.progress()
r10433 count += 1
Matt Mackall
verify: filter messages about missing null manifests (issue2900)
r14865 if m == nullid:
continue
Martin Geisler
progress: use a verb (+noun) in present participle
r10698 ui.progress(_('crosschecking'), count, total=total)
Matt Mackall
verify: improve handling of empty or missing files...
r5541 err(c, _("changeset refers to unknown manifest %s") % short(m))
Alejandro Santos
verify: fix scope issues with del statement
r9033 mflinkrevs = None # del is bad here due to scope issues
Matt Mackall
Move repo.verify
r2778
Matt Mackall
replace util.sort with sorted built-in...
r8209 for f in sorted(filelinkrevs):
Augie Fackler
verify: call ui.progress()
r10433 count += 1
Matt Mackall
verify: filter messages about missing null manifests (issue2900)
r14872 if m == nullid:
continue
Martin Geisler
progress: use a verb (+noun) in present participle
r10698 ui.progress(_('crosschecking'), count, total=total)
Matt Mackall
verify: improve handling of empty or missing files...
r5541 if f not in filenodes:
lr = filelinkrevs[f][0]
err(lr, _("in changeset but not in manifest"), f)
Matt Mackall
Move repo.verify
r2778
Matt Mackall
verify: lots of refactoring...
r6752 if havecl:
Matt Mackall
replace util.sort with sorted built-in...
r8209 for f in sorted(filenodes):
Augie Fackler
verify: call ui.progress()
r10433 count += 1
Martin Geisler
progress: use a verb (+noun) in present participle
r10698 ui.progress(_('crosschecking'), count, total=total)
Matt Mackall
verify: lots of refactoring...
r6752 if f not in filelinkrevs:
try:
Matt Mackall
linkrev: take a revision number rather than a hash
r7361 fl = repo.file(f)
lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
Matt Mackall
verify: lots of refactoring...
r6752 except:
lr = None
err(lr, _("in manifest but not in changeset"), f)
Martin Geisler
progress: use a verb (+noun) in present participle
r10698 ui.progress(_('crosschecking'), None)
Augie Fackler
verify: call ui.progress()
r10433
Matt Mackall
verify: add some local variables
r6751 ui.status(_("checking files\n"))
Adrian Buehlmann
verify: check repo.store
r6892
Benoit Boissinot
verify: use set instead of dict
r8466 storefiles = set()
Matt Mackall
store: change handling of decoding errors
r6900 for f, f2, size in repo.store.datafiles():
if not f:
err(None, _("cannot decode filename '%s'") % f2)
Thomas Arendsen Hein
verify: fix "missing revlog!" errors for revlog format v0 and add test...
r12170 elif size > 0 or not revlogv1:
Benoit Boissinot
verify: use set instead of dict
r8466 storefiles.add(f)
Adrian Buehlmann
verify: check repo.store
r6892
Matt Mackall
replace util.sort with sorted built-in...
r8209 files = sorted(set(filenodes) | set(filelinkrevs))
Augie Fackler
verify: call ui.progress()
r10433 total = len(files)
for i, f in enumerate(files):
Martin Geisler
progress: use a verb (+noun) in present participle
r10698 ui.progress(_('checking'), i, item=f, total=total)
Henrik Stuart
verify: avoid exception on missing file revlog...
r8291 try:
linkrevs = filelinkrevs[f]
except KeyError:
# in manifest but not in changelog
linkrevs = []
if linkrevs:
lr = linkrevs[0]
else:
lr = None
Benoit Boissinot
verify: do not abort on fully corrupted revlog
r7832 try:
fl = repo.file(f)
except error.RevlogError, e:
Benoit Boissinot
verify: find correct first corrupted cset for missing/corrupted revlogs
r7833 err(lr, _("broken revlog! (%s)") % e, f)
Benoit Boissinot
verify: do not abort on fully corrupted revlog
r7832 continue
Adrian Buehlmann
verify: check repo.store
r6892
Matt Mackall
store: change handling of decoding errors
r6900 for ff in fl.files():
Adrian Buehlmann
verify: check repo.store
r6892 try:
Benoit Boissinot
verify: use set instead of dict
r8466 storefiles.remove(ff)
Adrian Buehlmann
verify: check repo.store
r6892 except KeyError:
Benoit Boissinot
verify: find correct first corrupted cset for missing/corrupted revlogs
r7833 err(lr, _("missing revlog!"), ff)
Adrian Buehlmann
verify: check repo.store
r6892
Benoit Boissinot
verify: reference the correct linkrev when a filelog is missing...
r8292 checklog(fl, f, lr)
Matt Mackall
verify: report first bad changeset...
r5313 seen = {}
Matt Mackall
verify: initialize rp variable in case we hit out of memory
r11756 rp = None
Matt Mackall
add __len__ and __iter__ methods to repo and revlog
r6750 for i in fl:
Matt Mackall
Move repo.verify
r2778 revisions += 1
n = fl.node(i)
Henrik Stuart
verify: avoid exception on missing file revlog...
r8291 lr = checkentry(fl, i, n, seen, linkrevs, f)
Matt Mackall
verify: improve handling of empty or missing files...
r5541 if f in filenodes:
if havemf and n not in filenodes[f]:
Matt Mackall
verify: lots of refactoring...
r6752 err(lr, _("%s not in manifests") % (short(n)), f)
Matt Mackall
verify: improve handling of empty or missing files...
r5541 else:
del filenodes[f][n]
Matt Mackall
Move repo.verify
r2778
# verify contents
try:
Matt Mackall
verify: reduce memory footprint when unpacking files...
r11753 l = len(fl.read(n))
Matt Mackall
verify: lots of refactoring...
r6752 rp = fl.renamed(n)
Matt Mackall
verify: reduce memory footprint when unpacking files...
r11753 if l != fl.size(i):
Matt Mackall
verify: don't trip over binary files starting with 01 0a
r7675 if len(fl.revision(n)) != fl.size(i):
Matt Mackall
verify: lots of refactoring...
r6752 err(lr, _("unpacked size is %s, %s expected") %
Matt Mackall
verify: reduce memory footprint when unpacking files...
r11753 (l, fl.size(i)), f)
Matt Mackall
Move repo.verify
r2778 except Exception, inst:
Matt Mackall
verify: lots of refactoring...
r6752 exc(lr, _("unpacking %s") % short(n), inst, f)
Matt Mackall
Move repo.verify
r2778
Matt Mackall
verify: add rename link checking
r3744 # check renames
try:
if rp:
Patrick Mezard
verify: detect file copy sources not in parents with --verbose
r9545 if lr is not None and ui.verbose:
ctx = lrugetctx(lr)
found = False
for pctx in ctx.parents():
if rp[0] in pctx:
found = True
break
if not found:
warn(_("warning: copy source of '%s' not"
" in parents of %s") % (f, ctx))
Matt Mackall
verify: add rename link checking
r3744 fl2 = repo.file(rp[0])
Matt Mackall
add __len__ and __iter__ methods to repo and revlog
r6750 if not len(fl2):
Matt Mackall
verify: lots of refactoring...
r6752 err(lr, _("empty or missing copy source revlog %s:%s")
Patrick Mezard
verify: check copy source revlog and nodeid
r6534 % (rp[0], short(rp[1])), f)
elif rp[1] == nullid:
Matt Mackall
verify: demote warning about nullid in copy to note
r8993 ui.note(_("warning: %s@%s: copy source"
" revision is nullid %s:%s\n")
Benoit Boissinot
c0bd7d8b69ef uses err() instead of warn() but prototype doesn't match...
r7004 % (f, lr, rp[0], short(rp[1])))
Patrick Mezard
verify: check copy source revlog and nodeid
r6534 else:
Peter Arrenbrecht
cleanup: drop variables for unused return values...
r7874 fl2.rev(rp[1])
Matt Mackall
verify: add rename link checking
r3744 except Exception, inst:
Matt Mackall
verify: lots of refactoring...
r6752 exc(lr, _("checking rename of %s") % short(n), inst, f)
Matt Mackall
verify: add rename link checking
r3744
Matt Mackall
Move repo.verify
r2778 # cross-check
Matt Mackall
verify: improve handling of empty or missing files...
r5541 if f in filenodes:
Matt Mackall
many, many trivial check-code fixups
r10282 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
Matt Mackall
replace util.sort with sorted built-in...
r8209 for lr, node in sorted(fns):
Matt Mackall
verify: improve handling of empty or missing files...
r5541 err(lr, _("%s in manifests not found") % short(node), f)
Martin Geisler
progress: use a verb (+noun) in present participle
r10698 ui.progress(_('checking'), None)
Matt Mackall
Move repo.verify
r2778
Adrian Buehlmann
verify: check repo.store
r6892 for f in storefiles:
warn(_("warning: orphan revlog '%s'") % f)
Matt Mackall
verify: add some local variables
r6751 ui.status(_("%d files, %d changesets, %d total revisions\n") %
Matt Mackall
verify: lots of refactoring...
r6752 (len(files), len(cl), revisions))
Matt Mackall
Move repo.verify
r2778 if warnings[0]:
Matt Mackall
verify: add some local variables
r6751 ui.warn(_("%d warnings encountered!\n") % warnings[0])
Matt Mackall
Move repo.verify
r2778 if errors[0]:
Matt Mackall
verify: add some local variables
r6751 ui.warn(_("%d integrity errors encountered!\n") % errors[0])
Matt Mackall
verify: lots of refactoring...
r6752 if badrevs:
Matt Mackall
verify: add some local variables
r6751 ui.warn(_("(first damaged changeset appears to be %d)\n")
Matt Mackall
verify: lots of refactoring...
r6752 % min(badrevs))
Matt Mackall
Move repo.verify
r2778 return 1