verify.py
338 lines
| 11.2 KiB
| text/x-python
|
PythonLexer
/ mercurial / verify.py
Matt Mackall
|
r2778 | # verify.py - repository integrity checking for Mercurial | ||
# | ||||
Thomas Arendsen Hein
|
r4635 | # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com> | ||
Matt Mackall
|
r2778 | # | ||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Matt Mackall
|
r2778 | |||
Gregory Szorc
|
r25991 | from __future__ import absolute_import | ||
Bryan O'Sullivan
|
r17860 | import os | ||
Gregory Szorc
|
r25991 | |||
from .i18n import _ | ||||
from .node import ( | ||||
nullid, | ||||
short, | ||||
) | ||||
from . import ( | ||||
error, | ||||
revlog, | ||||
util, | ||||
) | ||||
Matt Mackall
|
r2778 | |||
def verify(repo): | ||||
Matt Mackall
|
r4915 | lock = repo.lock() | ||
try: | ||||
return _verify(repo) | ||||
finally: | ||||
Ronny Pfannschmidt
|
r8109 | lock.release() | ||
Matt Mackall
|
r4915 | |||
Bryan O'Sullivan
|
r17860 | def _normpath(f): | ||
# under hg < 2.4, convert didn't sanitize paths properly, so a | ||||
# converted repo may contain repeated slashes | ||||
while '//' in f: | ||||
f = f.replace('//', '/') | ||||
return f | ||||
Matt Mackall
|
r4915 | def _verify(repo): | ||
Pierre-Yves David
|
r18003 | repo = repo.unfiltered() | ||
Matt Mackall
|
r6752 | mflinkrevs = {} | ||
Matt Mackall
|
r2778 | filelinkrevs = {} | ||
filenodes = {} | ||||
Matt Mackall
|
r6752 | revisions = 0 | ||
Benoit Boissinot
|
r8466 | badrevs = set() | ||
Matt Mackall
|
r2778 | errors = [0] | ||
warnings = [0] | ||||
Matt Mackall
|
r6751 | ui = repo.ui | ||
cl = repo.changelog | ||||
mf = repo.manifest | ||||
Matt Mackall
|
r9657 | lrugetctx = util.lrucachefunc(repo.changectx) | ||
Matt Mackall
|
r2778 | |||
Yuya Nishihara
|
r20530 | if not repo.url().startswith('file:'): | ||
John Mulligan
|
r7141 | raise util.Abort(_("cannot verify bundle or remote repos")) | ||
Matt Mackall
|
r5313 | def err(linkrev, msg, filename=None): | ||
Martin Geisler
|
r13031 | if linkrev is not None: | ||
Benoit Boissinot
|
r8466 | badrevs.add(linkrev) | ||
Matt Mackall
|
r5313 | else: | ||
Matt Mackall
|
r6752 | linkrev = '?' | ||
Matt Mackall
|
r5313 | msg = "%s: %s" % (linkrev, msg) | ||
if filename: | ||||
msg = "%s@%s" % (filename, msg) | ||||
Matt Mackall
|
r6751 | ui.warn(" " + msg + "\n") | ||
Matt Mackall
|
r2778 | errors[0] += 1 | ||
Matt Mackall
|
r6752 | def exc(linkrev, msg, inst, filename=None): | ||
if isinstance(inst, KeyboardInterrupt): | ||||
ui.warn(_("interrupted")) | ||||
raise | ||||
Matt Mackall
|
r11752 | if not str(inst): | ||
inst = repr(inst) | ||||
Matt Mackall
|
r6752 | err(linkrev, "%s: %s" % (msg, inst), filename) | ||
Matt Mackall
|
r2778 | def warn(msg): | ||
Matt Mackall
|
r6751 | ui.warn(msg + "\n") | ||
Matt Mackall
|
r2778 | warnings[0] += 1 | ||
Benoit Boissinot
|
r8292 | def checklog(obj, name, linkrev): | ||
Matt Mackall
|
r6752 | if not len(obj) and (havecl or havemf): | ||
Benoit Boissinot
|
r8292 | err(linkrev, _("empty or missing %s") % name) | ||
Matt Mackall
|
r6752 | return | ||
Matt Mackall
|
r2778 | d = obj.checksize() | ||
if d[0]: | ||||
Matt Mackall
|
r5313 | err(None, _("data length off by %d bytes") % d[0], name) | ||
Matt Mackall
|
r2778 | if d[1]: | ||
Matt Mackall
|
r5313 | err(None, _("index contains %d extra bytes") % d[1], name) | ||
Matt Mackall
|
r2778 | |||
if obj.version != revlog.REVLOGV0: | ||||
if not revlogv1: | ||||
warn(_("warning: `%s' uses revlog format 1") % name) | ||||
elif revlogv1: | ||||
warn(_("warning: `%s' uses revlog format 0") % name) | ||||
Matt Mackall
|
r6752 | def checkentry(obj, i, node, seen, linkrevs, f): | ||
Matt Mackall
|
r7361 | lr = obj.linkrev(obj.rev(node)) | ||
Matt Mackall
|
r6752 | if lr < 0 or (havecl and lr not in linkrevs): | ||
if lr < 0 or lr >= len(cl): | ||||
Wagner Bruna
|
r7926 | msg = _("rev %d points to nonexistent changeset %d") | ||
else: | ||||
msg = _("rev %d points to unexpected changeset %d") | ||||
err(None, msg % (i, lr), f) | ||||
Matt Mackall
|
r6752 | if linkrevs: | ||
Matt Mackall
|
r9657 | if f and len(linkrevs) > 1: | ||
try: | ||||
# attempt to filter down to real linkrevs | ||||
linkrevs = [l for l in linkrevs | ||||
if lrugetctx(l)[f].filenode() == node] | ||||
Brodie Rao
|
r16689 | except Exception: | ||
Matt Mackall
|
r9657 | pass | ||
Martin Geisler
|
r9198 | warn(_(" (expected %s)") % " ".join(map(str, linkrevs))) | ||
Matt Mackall
|
r6752 | lr = None # can't be trusted | ||
try: | ||||
p1, p2 = obj.parents(node) | ||||
if p1 not in seen and p1 != nullid: | ||||
err(lr, _("unknown parent 1 %s of %s") % | ||||
FUJIWARA Katsunori
|
r17719 | (short(p1), short(node)), f) | ||
Matt Mackall
|
r6752 | if p2 not in seen and p2 != nullid: | ||
err(lr, _("unknown parent 2 %s of %s") % | ||||
FUJIWARA Katsunori
|
r17719 | (short(p2), short(node)), f) | ||
Gregory Szorc
|
r25660 | except Exception as inst: | ||
Matt Mackall
|
r6752 | exc(lr, _("checking parents of %s") % short(node), inst, f) | ||
if node in seen: | ||||
FUJIWARA Katsunori
|
r17719 | err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f) | ||
seen[node] = i | ||||
Matt Mackall
|
r6752 | return lr | ||
Sune Foldager
|
r9690 | if os.path.exists(repo.sjoin("journal")): | ||
ui.warn(_("abandoned transaction found - run hg recover\n")) | ||||
Matt Mackall
|
r6751 | revlogv1 = cl.version != revlog.REVLOGV0 | ||
if ui.verbose or not revlogv1: | ||||
ui.status(_("repository uses revlog format %d\n") % | ||||
Matt Mackall
|
r2778 | (revlogv1 and 1 or 0)) | ||
Matt Mackall
|
r6752 | havecl = len(cl) > 0 | ||
havemf = len(mf) > 0 | ||||
Matt Mackall
|
r6751 | ui.status(_("checking changesets\n")) | ||
FUJIWARA Katsunori
|
r17720 | refersmf = False | ||
Matt Mackall
|
r6752 | seen = {} | ||
Benoit Boissinot
|
r8292 | checklog(cl, "changelog", 0) | ||
Augie Fackler
|
r10433 | total = len(repo) | ||
Matt Mackall
|
r6750 | for i in repo: | ||
timeless
|
r12745 | ui.progress(_('checking'), i, total=total, unit=_('changesets')) | ||
Matt Mackall
|
r6751 | n = cl.node(i) | ||
Matt Mackall
|
r6752 | checkentry(cl, i, n, seen, [i], "changelog") | ||
Matt Mackall
|
r2778 | |||
try: | ||||
Matt Mackall
|
r6751 | changes = cl.read(n) | ||
Patrick Mezard
|
r17385 | if changes[0] != nullid: | ||
mflinkrevs.setdefault(changes[0], []).append(i) | ||||
FUJIWARA Katsunori
|
r17720 | refersmf = True | ||
Matt Mackall
|
r6752 | for f in changes[3]: | ||
Bryan O'Sullivan
|
r17860 | filelinkrevs.setdefault(_normpath(f), []).append(i) | ||
Gregory Szorc
|
r25660 | except Exception as inst: | ||
FUJIWARA Katsunori
|
r17720 | refersmf = True | ||
Matt Mackall
|
r6752 | exc(i, _("unpacking changeset %s") % short(n), inst) | ||
timeless
|
r12745 | ui.progress(_('checking'), None) | ||
Matt Mackall
|
r2778 | |||
Matt Mackall
|
r6752 | ui.status(_("checking manifests\n")) | ||
Matt Mackall
|
r2778 | seen = {} | ||
FUJIWARA Katsunori
|
r17720 | if refersmf: | ||
Patrick Mezard
|
r17385 | # Do not check manifest if there are only changelog entries with | ||
# null manifests. | ||||
checklog(mf, "manifest", 0) | ||||
Augie Fackler
|
r10433 | total = len(mf) | ||
Matt Mackall
|
r6751 | for i in mf: | ||
timeless
|
r12745 | ui.progress(_('checking'), i, total=total, unit=_('manifests')) | ||
Matt Mackall
|
r6751 | n = mf.node(i) | ||
Matt Mackall
|
r6752 | lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest") | ||
if n in mflinkrevs: | ||||
del mflinkrevs[n] | ||||
Peter Arrenbrecht
|
r8394 | else: | ||
err(lr, _("%s not in changesets") % short(n), "manifest") | ||||
Matt Mackall
|
r2778 | |||
try: | ||||
Matt Mackall
|
r6751 | for f, fn in mf.readdelta(n).iteritems(): | ||
Matt Mackall
|
r6752 | if not f: | ||
err(lr, _("file without name in manifest")) | ||||
Matt Mackall
|
r24481 | elif f != "/dev/null": # ignore this in very old repos | ||
Bryan O'Sullivan
|
r17860 | filenodes.setdefault(_normpath(f), {}).setdefault(fn, lr) | ||
Gregory Szorc
|
r25660 | except Exception as inst: | ||
Matt Mackall
|
r6752 | exc(lr, _("reading manifest delta %s") % short(n), inst) | ||
timeless
|
r12745 | ui.progress(_('checking'), None) | ||
Matt Mackall
|
r2778 | |||
Matt Mackall
|
r6751 | ui.status(_("crosschecking files in changesets and manifests\n")) | ||
Matt Mackall
|
r2778 | |||
Augie Fackler
|
r10433 | total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes) | ||
count = 0 | ||||
Matt Mackall
|
r6752 | if havemf: | ||
Matt Mackall
|
r10282 | for c, m in sorted([(c, m) for m in mflinkrevs | ||
for c in mflinkrevs[m]]): | ||||
Augie Fackler
|
r10433 | count += 1 | ||
Matt Mackall
|
r14865 | if m == nullid: | ||
continue | ||||
Martin Geisler
|
r10698 | ui.progress(_('crosschecking'), count, total=total) | ||
Matt Mackall
|
r5541 | err(c, _("changeset refers to unknown manifest %s") % short(m)) | ||
Alejandro Santos
|
r9033 | mflinkrevs = None # del is bad here due to scope issues | ||
Matt Mackall
|
r2778 | |||
Matt Mackall
|
r8209 | for f in sorted(filelinkrevs): | ||
Augie Fackler
|
r10433 | count += 1 | ||
Martin Geisler
|
r10698 | ui.progress(_('crosschecking'), count, total=total) | ||
Matt Mackall
|
r5541 | if f not in filenodes: | ||
lr = filelinkrevs[f][0] | ||||
err(lr, _("in changeset but not in manifest"), f) | ||||
Matt Mackall
|
r2778 | |||
Matt Mackall
|
r6752 | if havecl: | ||
Matt Mackall
|
r8209 | for f in sorted(filenodes): | ||
Augie Fackler
|
r10433 | count += 1 | ||
Martin Geisler
|
r10698 | ui.progress(_('crosschecking'), count, total=total) | ||
Matt Mackall
|
r6752 | if f not in filelinkrevs: | ||
try: | ||||
Matt Mackall
|
r7361 | fl = repo.file(f) | ||
lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]]) | ||||
Brodie Rao
|
r16689 | except Exception: | ||
Matt Mackall
|
r6752 | lr = None | ||
err(lr, _("in manifest but not in changeset"), f) | ||||
Martin Geisler
|
r10698 | ui.progress(_('crosschecking'), None) | ||
Augie Fackler
|
r10433 | |||
Matt Mackall
|
r6751 | ui.status(_("checking files\n")) | ||
Adrian Buehlmann
|
r6892 | |||
Benoit Boissinot
|
r8466 | storefiles = set() | ||
Matt Mackall
|
r6900 | for f, f2, size in repo.store.datafiles(): | ||
if not f: | ||||
err(None, _("cannot decode filename '%s'") % f2) | ||||
Thomas Arendsen Hein
|
r12170 | elif size > 0 or not revlogv1: | ||
Bryan O'Sullivan
|
r17860 | storefiles.add(_normpath(f)) | ||
Adrian Buehlmann
|
r6892 | |||
Gregory Szorc
|
r25653 | fncachewarned = False | ||
Matt Mackall
|
r8209 | files = sorted(set(filenodes) | set(filelinkrevs)) | ||
Augie Fackler
|
r10433 | total = len(files) | ||
for i, f in enumerate(files): | ||||
Martin Geisler
|
r10698 | ui.progress(_('checking'), i, item=f, total=total) | ||
Henrik Stuart
|
r8291 | try: | ||
linkrevs = filelinkrevs[f] | ||||
except KeyError: | ||||
# in manifest but not in changelog | ||||
linkrevs = [] | ||||
if linkrevs: | ||||
lr = linkrevs[0] | ||||
else: | ||||
lr = None | ||||
Benoit Boissinot
|
r7832 | try: | ||
fl = repo.file(f) | ||||
Gregory Szorc
|
r25660 | except error.RevlogError as e: | ||
Benoit Boissinot
|
r7833 | err(lr, _("broken revlog! (%s)") % e, f) | ||
Benoit Boissinot
|
r7832 | continue | ||
Adrian Buehlmann
|
r6892 | |||
Matt Mackall
|
r6900 | for ff in fl.files(): | ||
Adrian Buehlmann
|
r6892 | try: | ||
Benoit Boissinot
|
r8466 | storefiles.remove(ff) | ||
Adrian Buehlmann
|
r6892 | except KeyError: | ||
Matt Mackall
|
r25627 | warn(_(" warning: revlog '%s' not in fncache!") % ff) | ||
Gregory Szorc
|
r25653 | fncachewarned = True | ||
Adrian Buehlmann
|
r6892 | |||
Benoit Boissinot
|
r8292 | checklog(fl, f, lr) | ||
Matt Mackall
|
r5313 | seen = {} | ||
Matt Mackall
|
r11756 | rp = None | ||
Matt Mackall
|
r6750 | for i in fl: | ||
Matt Mackall
|
r2778 | revisions += 1 | ||
n = fl.node(i) | ||||
Henrik Stuart
|
r8291 | lr = checkentry(fl, i, n, seen, linkrevs, f) | ||
Matt Mackall
|
r5541 | if f in filenodes: | ||
if havemf and n not in filenodes[f]: | ||||
Matt Mackall
|
r6752 | err(lr, _("%s not in manifests") % (short(n)), f) | ||
Matt Mackall
|
r5541 | else: | ||
del filenodes[f][n] | ||||
Matt Mackall
|
r2778 | |||
# verify contents | ||||
try: | ||||
Matt Mackall
|
r11753 | l = len(fl.read(n)) | ||
Matt Mackall
|
r6752 | rp = fl.renamed(n) | ||
Matt Mackall
|
r11753 | if l != fl.size(i): | ||
Matt Mackall
|
r7675 | if len(fl.revision(n)) != fl.size(i): | ||
Matt Mackall
|
r6752 | err(lr, _("unpacked size is %s, %s expected") % | ||
Matt Mackall
|
r11753 | (l, fl.size(i)), f) | ||
Mike Edgar
|
r22933 | except error.CensoredNodeError: | ||
Matt Mackall
|
r25846 | # experimental config: censor.policy | ||
Mike Edgar
|
r22933 | if ui.config("censor", "policy", "abort") == "abort": | ||
err(lr, _("censored file data"), f) | ||||
Gregory Szorc
|
r25660 | except Exception as inst: | ||
Matt Mackall
|
r6752 | exc(lr, _("unpacking %s") % short(n), inst, f) | ||
Matt Mackall
|
r2778 | |||
Matt Mackall
|
r3744 | # check renames | ||
try: | ||||
if rp: | ||||
Patrick Mezard
|
r9545 | if lr is not None and ui.verbose: | ||
ctx = lrugetctx(lr) | ||||
found = False | ||||
for pctx in ctx.parents(): | ||||
if rp[0] in pctx: | ||||
found = True | ||||
break | ||||
if not found: | ||||
warn(_("warning: copy source of '%s' not" | ||||
" in parents of %s") % (f, ctx)) | ||||
Matt Mackall
|
r3744 | fl2 = repo.file(rp[0]) | ||
Matt Mackall
|
r6750 | if not len(fl2): | ||
Matt Mackall
|
r6752 | err(lr, _("empty or missing copy source revlog %s:%s") | ||
Patrick Mezard
|
r6534 | % (rp[0], short(rp[1])), f) | ||
elif rp[1] == nullid: | ||||
Matt Mackall
|
r8993 | ui.note(_("warning: %s@%s: copy source" | ||
" revision is nullid %s:%s\n") | ||||
Benoit Boissinot
|
r7004 | % (f, lr, rp[0], short(rp[1]))) | ||
Patrick Mezard
|
r6534 | else: | ||
Peter Arrenbrecht
|
r7874 | fl2.rev(rp[1]) | ||
Gregory Szorc
|
r25660 | except Exception as inst: | ||
Matt Mackall
|
r6752 | exc(lr, _("checking rename of %s") % short(n), inst, f) | ||
Matt Mackall
|
r3744 | |||
Matt Mackall
|
r2778 | # cross-check | ||
Matt Mackall
|
r5541 | if f in filenodes: | ||
Matt Mackall
|
r10282 | fns = [(lr, n) for n, lr in filenodes[f].iteritems()] | ||
Matt Mackall
|
r8209 | for lr, node in sorted(fns): | ||
Matt Mackall
|
r5541 | err(lr, _("%s in manifests not found") % short(node), f) | ||
Martin Geisler
|
r10698 | ui.progress(_('checking'), None) | ||
Matt Mackall
|
r2778 | |||
Adrian Buehlmann
|
r6892 | for f in storefiles: | ||
warn(_("warning: orphan revlog '%s'") % f) | ||||
Matt Mackall
|
r6751 | ui.status(_("%d files, %d changesets, %d total revisions\n") % | ||
Matt Mackall
|
r6752 | (len(files), len(cl), revisions)) | ||
Matt Mackall
|
r2778 | if warnings[0]: | ||
Matt Mackall
|
r6751 | ui.warn(_("%d warnings encountered!\n") % warnings[0]) | ||
Gregory Szorc
|
r25653 | if fncachewarned: | ||
ui.warn(_('hint: run "hg debugrebuildfncache" to recover from ' | ||||
'corrupt fncache\n')) | ||||
Matt Mackall
|
r2778 | if errors[0]: | ||
Matt Mackall
|
r6751 | ui.warn(_("%d integrity errors encountered!\n") % errors[0]) | ||
Matt Mackall
|
r6752 | if badrevs: | ||
Matt Mackall
|
r6751 | ui.warn(_("(first damaged changeset appears to be %d)\n") | ||
Matt Mackall
|
r6752 | % min(badrevs)) | ||
Matt Mackall
|
r2778 | return 1 | ||