verify.py
437 lines
| 16.1 KiB
| text/x-python
|
PythonLexer
/ mercurial / verify.py
Matt Mackall
|
r2778 | # verify.py - repository integrity checking for Mercurial | ||
# | ||||
Thomas Arendsen Hein
|
r4635 | # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com> | ||
Matt Mackall
|
r2778 | # | ||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Matt Mackall
|
r2778 | |||
Gregory Szorc
|
r25991 | from __future__ import absolute_import | ||
Bryan O'Sullivan
|
r17860 | import os | ||
Gregory Szorc
|
r25991 | |||
from .i18n import _ | ||||
from .node import ( | ||||
nullid, | ||||
short, | ||||
) | ||||
from . import ( | ||||
error, | ||||
revlog, | ||||
util, | ||||
) | ||||
Matt Mackall
|
r2778 | |||
def verify(repo): | ||||
Bryan O'Sullivan
|
r27849 | with repo.lock(): | ||
Durham Goode
|
r27444 | return verifier(repo).verify() | ||
Matt Mackall
|
r4915 | |||
Bryan O'Sullivan
|
r17860 | def _normpath(f): | ||
# under hg < 2.4, convert didn't sanitize paths properly, so a | ||||
# converted repo may contain repeated slashes | ||||
while '//' in f: | ||||
f = f.replace('//', '/') | ||||
return f | ||||
Augie Fackler
|
r26900 | def _validpath(repo, path): | ||
"""Returns False if a path should NOT be treated as part of a repo. | ||||
For all in-core cases, this returns True, as we have no way for a | ||||
path to be mentioned in the history but not actually be | ||||
relevant. For narrow clones, this is important because many | ||||
filelogs will be missing, and changelog entries may mention | ||||
modified files that are outside the narrow scope. | ||||
""" | ||||
return True | ||||
Durham Goode
|
r27443 | class verifier(object): | ||
Durham Goode
|
r27444 | def __init__(self, repo): | ||
self.repo = repo.unfiltered() | ||||
self.ui = repo.ui | ||||
self.badrevs = set() | ||||
Matt Mackall
|
r27453 | self.errors = 0 | ||
self.warnings = 0 | ||||
Durham Goode
|
r27444 | self.havecl = len(repo.changelog) > 0 | ||
self.havemf = len(repo.manifest) > 0 | ||||
self.revlogv1 = repo.changelog.version != revlog.REVLOGV0 | ||||
self.lrugetctx = util.lrucachefunc(repo.changectx) | ||||
self.refersmf = False | ||||
Durham Goode
|
r27445 | self.fncachewarned = False | ||
Durham Goode
|
r27444 | |||
Durham Goode
|
r27446 | def warn(self, msg): | ||
self.ui.warn(msg + "\n") | ||||
Matt Mackall
|
r27453 | self.warnings += 1 | ||
Durham Goode
|
r27446 | |||
Durham Goode
|
r27447 | def err(self, linkrev, msg, filename=None): | ||
if linkrev is not None: | ||||
self.badrevs.add(linkrev) | ||||
else: | ||||
linkrev = '?' | ||||
msg = "%s: %s" % (linkrev, msg) | ||||
if filename: | ||||
msg = "%s@%s" % (filename, msg) | ||||
self.ui.warn(" " + msg + "\n") | ||||
Matt Mackall
|
r27453 | self.errors += 1 | ||
Durham Goode
|
r27447 | |||
Durham Goode
|
r27448 | def exc(self, linkrev, msg, inst, filename=None): | ||
if not str(inst): | ||||
inst = repr(inst) | ||||
self.err(linkrev, "%s: %s" % (msg, inst), filename) | ||||
Durham Goode
|
r27642 | def checklog(self, obj, name, linkrev): | ||
if not len(obj) and (self.havecl or self.havemf): | ||||
self.err(linkrev, _("empty or missing %s") % name) | ||||
return | ||||
d = obj.checksize() | ||||
if d[0]: | ||||
self.err(None, _("data length off by %d bytes") % d[0], name) | ||||
if d[1]: | ||||
self.err(None, _("index contains %d extra bytes") % d[1], name) | ||||
if obj.version != revlog.REVLOGV0: | ||||
if not self.revlogv1: | ||||
self.warn(_("warning: `%s' uses revlog format 1") % name) | ||||
elif self.revlogv1: | ||||
self.warn(_("warning: `%s' uses revlog format 0") % name) | ||||
Durham Goode
|
r27643 | def checkentry(self, obj, i, node, seen, linkrevs, f): | ||
lr = obj.linkrev(obj.rev(node)) | ||||
if lr < 0 or (self.havecl and lr not in linkrevs): | ||||
if lr < 0 or lr >= len(self.repo.changelog): | ||||
msg = _("rev %d points to nonexistent changeset %d") | ||||
else: | ||||
msg = _("rev %d points to unexpected changeset %d") | ||||
self.err(None, msg % (i, lr), f) | ||||
if linkrevs: | ||||
if f and len(linkrevs) > 1: | ||||
try: | ||||
# attempt to filter down to real linkrevs | ||||
linkrevs = [l for l in linkrevs | ||||
if self.lrugetctx(l)[f].filenode() == node] | ||||
except Exception: | ||||
pass | ||||
self.warn(_(" (expected %s)") % " ".join(map(str, linkrevs))) | ||||
lr = None # can't be trusted | ||||
try: | ||||
p1, p2 = obj.parents(node) | ||||
if p1 not in seen and p1 != nullid: | ||||
self.err(lr, _("unknown parent 1 %s of %s") % | ||||
(short(p1), short(node)), f) | ||||
if p2 not in seen and p2 != nullid: | ||||
self.err(lr, _("unknown parent 2 %s of %s") % | ||||
(short(p2), short(node)), f) | ||||
except Exception as inst: | ||||
self.exc(lr, _("checking parents of %s") % short(node), inst, f) | ||||
if node in seen: | ||||
self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f) | ||||
seen[node] = i | ||||
return lr | ||||
Durham Goode
|
r27444 | def verify(self): | ||
repo = self.repo | ||||
Durham Goode
|
r27648 | |||
Durham Goode
|
r27443 | ui = repo.ui | ||
Matt Mackall
|
r2778 | |||
Durham Goode
|
r27443 | if not repo.url().startswith('file:'): | ||
raise error.Abort(_("cannot verify bundle or remote repos")) | ||||
Matt Mackall
|
r6752 | |||
Durham Goode
|
r27443 | if os.path.exists(repo.sjoin("journal")): | ||
ui.warn(_("abandoned transaction found - run hg recover\n")) | ||||
Durham Goode
|
r27648 | if ui.verbose or not self.revlogv1: | ||
Durham Goode
|
r27443 | ui.status(_("repository uses revlog format %d\n") % | ||
Durham Goode
|
r27648 | (self.revlogv1 and 1 or 0)) | ||
Durham Goode
|
r27443 | |||
Martin von Zweigbergk
|
r27695 | mflinkrevs, filelinkrevs = self._verifychangelog() | ||
Durham Goode
|
r27647 | |||
Martin von Zweigbergk
|
r27695 | filenodes = self._verifymanifest(mflinkrevs) | ||
Martin von Zweigbergk
|
r28111 | del mflinkrevs | ||
Durham Goode
|
r27647 | |||
Martin von Zweigbergk
|
r28111 | self._crosscheckfiles(filelinkrevs, filenodes) | ||
Durham Goode
|
r27647 | |||
totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs) | ||||
ui.status(_("%d files, %d changesets, %d total revisions\n") % | ||||
Durham Goode
|
r27648 | (totalfiles, len(repo.changelog), filerevisions)) | ||
Durham Goode
|
r27647 | if self.warnings: | ||
ui.warn(_("%d warnings encountered!\n") % self.warnings) | ||||
if self.fncachewarned: | ||||
ui.warn(_('hint: run "hg debugrebuildfncache" to recover from ' | ||||
'corrupt fncache\n')) | ||||
if self.errors: | ||||
ui.warn(_("%d integrity errors encountered!\n") % self.errors) | ||||
Durham Goode
|
r27648 | if self.badrevs: | ||
Durham Goode
|
r27647 | ui.warn(_("(first damaged changeset appears to be %d)\n") | ||
Durham Goode
|
r27648 | % min(self.badrevs)) | ||
Durham Goode
|
r27647 | return 1 | ||
Martin von Zweigbergk
|
r27695 | def _verifychangelog(self): | ||
Durham Goode
|
r27647 | ui = self.ui | ||
repo = self.repo | ||||
cl = repo.changelog | ||||
Durham Goode
|
r27443 | ui.status(_("checking changesets\n")) | ||
Martin von Zweigbergk
|
r27695 | mflinkrevs = {} | ||
filelinkrevs = {} | ||||
Durham Goode
|
r27443 | seen = {} | ||
Durham Goode
|
r27642 | self.checklog(cl, "changelog", 0) | ||
Durham Goode
|
r27443 | total = len(repo) | ||
for i in repo: | ||||
ui.progress(_('checking'), i, total=total, unit=_('changesets')) | ||||
n = cl.node(i) | ||||
Durham Goode
|
r27643 | self.checkentry(cl, i, n, seen, [i], "changelog") | ||
Matt Mackall
|
r2778 | |||
Durham Goode
|
r27443 | try: | ||
changes = cl.read(n) | ||||
if changes[0] != nullid: | ||||
mflinkrevs.setdefault(changes[0], []).append(i) | ||||
Durham Goode
|
r27444 | self.refersmf = True | ||
Durham Goode
|
r27443 | for f in changes[3]: | ||
Augie Fackler
|
r26900 | if _validpath(repo, f): | ||
Durham Goode
|
r27443 | filelinkrevs.setdefault(_normpath(f), []).append(i) | ||
except Exception as inst: | ||||
Durham Goode
|
r27444 | self.refersmf = True | ||
Durham Goode
|
r27448 | self.exc(i, _("unpacking changeset %s") % short(n), inst) | ||
Durham Goode
|
r27443 | ui.progress(_('checking'), None) | ||
Martin von Zweigbergk
|
r27695 | return mflinkrevs, filelinkrevs | ||
Matt Mackall
|
r2778 | |||
Martin von Zweigbergk
|
r28205 | def _verifymanifest(self, mflinkrevs, dir="", storefiles=None, | ||
progress=None): | ||||
Durham Goode
|
r27646 | repo = self.repo | ||
ui = self.ui | ||||
Durham Goode
|
r30295 | mfl = self.repo.manifestlog | ||
mf = mfl._revlog.dirlog(dir) | ||||
Durham Goode
|
r27646 | |||
Martin von Zweigbergk
|
r28203 | if not dir: | ||
self.ui.status(_("checking manifests\n")) | ||||
Martin von Zweigbergk
|
r27695 | filenodes = {} | ||
Martin von Zweigbergk
|
r28203 | subdirnodes = {} | ||
Durham Goode
|
r27443 | seen = {} | ||
Martin von Zweigbergk
|
r28115 | label = "manifest" | ||
Martin von Zweigbergk
|
r28203 | if dir: | ||
label = dir | ||||
Martin von Zweigbergk
|
r28204 | revlogfiles = mf.files() | ||
storefiles.difference_update(revlogfiles) | ||||
Martin von Zweigbergk
|
r28205 | if progress: # should be true since we're in a subdirectory | ||
progress() | ||||
Durham Goode
|
r27444 | if self.refersmf: | ||
Durham Goode
|
r27443 | # Do not check manifest if there are only changelog entries with | ||
# null manifests. | ||||
Martin von Zweigbergk
|
r28115 | self.checklog(mf, label, 0) | ||
Durham Goode
|
r27443 | total = len(mf) | ||
for i in mf: | ||||
Martin von Zweigbergk
|
r28203 | if not dir: | ||
ui.progress(_('checking'), i, total=total, unit=_('manifests')) | ||||
Durham Goode
|
r27443 | n = mf.node(i) | ||
Martin von Zweigbergk
|
r28115 | lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label) | ||
Durham Goode
|
r27443 | if n in mflinkrevs: | ||
del mflinkrevs[n] | ||||
Martin von Zweigbergk
|
r28203 | elif dir: | ||
self.err(lr, _("%s not in parent-directory manifest") % | ||||
short(n), label) | ||||
Durham Goode
|
r27443 | else: | ||
Martin von Zweigbergk
|
r28115 | self.err(lr, _("%s not in changesets") % short(n), label) | ||
Matt Mackall
|
r2778 | |||
Durham Goode
|
r27443 | try: | ||
Durham Goode
|
r30295 | mfdelta = mfl.get(dir, n).readdelta(shallow=True) | ||
for f, fn, fl in mfdelta.iterentries(): | ||||
Durham Goode
|
r27443 | if not f: | ||
Martin von Zweigbergk
|
r28203 | self.err(lr, _("entry without name in manifest")) | ||
elif f == "/dev/null": # ignore this in very old repos | ||||
continue | ||||
fullpath = dir + _normpath(f) | ||||
if not _validpath(repo, fullpath): | ||||
continue | ||||
if fl == 't': | ||||
subdirnodes.setdefault(fullpath + '/', {}).setdefault( | ||||
fn, []).append(lr) | ||||
else: | ||||
filenodes.setdefault(fullpath, {}).setdefault(fn, lr) | ||||
Durham Goode
|
r27443 | except Exception as inst: | ||
Martin von Zweigbergk
|
r28115 | self.exc(lr, _("reading delta %s") % short(n), inst, label) | ||
Martin von Zweigbergk
|
r28203 | if not dir: | ||
ui.progress(_('checking'), None) | ||||
Durham Goode
|
r27443 | |||
Martin von Zweigbergk
|
r28111 | if self.havemf: | ||
for c, m in sorted([(c, m) for m in mflinkrevs | ||||
for c in mflinkrevs[m]]): | ||||
Martin von Zweigbergk
|
r28203 | if dir: | ||
self.err(c, _("parent-directory manifest refers to unknown " | ||||
"revision %s") % short(m), label) | ||||
else: | ||||
self.err(c, _("changeset refers to unknown revision %s") % | ||||
short(m), label) | ||||
if not dir and subdirnodes: | ||||
self.ui.status(_("checking directory manifests\n")) | ||||
Martin von Zweigbergk
|
r28204 | storefiles = set() | ||
Martin von Zweigbergk
|
r28205 | subdirs = set() | ||
Martin von Zweigbergk
|
r28204 | revlogv1 = self.revlogv1 | ||
for f, f2, size in repo.store.datafiles(): | ||||
if not f: | ||||
self.err(None, _("cannot decode filename '%s'") % f2) | ||||
elif (size > 0 or not revlogv1) and f.startswith('meta/'): | ||||
storefiles.add(_normpath(f)) | ||||
Martin von Zweigbergk
|
r28205 | subdirs.add(os.path.dirname(f)) | ||
subdircount = len(subdirs) | ||||
currentsubdir = [0] | ||||
def progress(): | ||||
currentsubdir[0] += 1 | ||||
ui.progress(_('checking'), currentsubdir[0], total=subdircount, | ||||
unit=_('manifests')) | ||||
Martin von Zweigbergk
|
r28204 | |||
Martin von Zweigbergk
|
r28203 | for subdir, linkrevs in subdirnodes.iteritems(): | ||
Martin von Zweigbergk
|
r28205 | subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles, | ||
progress) | ||||
Martin von Zweigbergk
|
r28203 | for f, onefilenodes in subdirfilenodes.iteritems(): | ||
filenodes.setdefault(f, {}).update(onefilenodes) | ||||
Martin von Zweigbergk
|
r28111 | |||
Martin von Zweigbergk
|
r28204 | if not dir and subdirnodes: | ||
Martin von Zweigbergk
|
r28205 | ui.progress(_('checking'), None) | ||
Martin von Zweigbergk
|
r28204 | for f in sorted(storefiles): | ||
self.warn(_("warning: orphan revlog '%s'") % f) | ||||
Martin von Zweigbergk
|
r27695 | return filenodes | ||
Durham Goode
|
r27645 | |||
Martin von Zweigbergk
|
r28111 | def _crosscheckfiles(self, filelinkrevs, filenodes): | ||
Durham Goode
|
r27645 | repo = self.repo | ||
ui = self.ui | ||||
Durham Goode
|
r27443 | ui.status(_("crosschecking files in changesets and manifests\n")) | ||
Matt Mackall
|
r2778 | |||
Martin von Zweigbergk
|
r28111 | total = len(filelinkrevs) + len(filenodes) | ||
Durham Goode
|
r27443 | count = 0 | ||
Durham Goode
|
r27645 | if self.havemf: | ||
Durham Goode
|
r27443 | for f in sorted(filelinkrevs): | ||
count += 1 | ||||
ui.progress(_('crosschecking'), count, total=total) | ||||
if f not in filenodes: | ||||
lr = filelinkrevs[f][0] | ||||
Durham Goode
|
r27447 | self.err(lr, _("in changeset but not in manifest"), f) | ||
Adrian Buehlmann
|
r6892 | |||
Durham Goode
|
r27645 | if self.havecl: | ||
Durham Goode
|
r27443 | for f in sorted(filenodes): | ||
count += 1 | ||||
ui.progress(_('crosschecking'), count, total=total) | ||||
if f not in filelinkrevs: | ||||
try: | ||||
fl = repo.file(f) | ||||
lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]]) | ||||
except Exception: | ||||
lr = None | ||||
Durham Goode
|
r27447 | self.err(lr, _("in manifest but not in changeset"), f) | ||
Durham Goode
|
r27443 | |||
ui.progress(_('crosschecking'), None) | ||||
Henrik Stuart
|
r8291 | |||
Durham Goode
|
r27644 | def _verifyfiles(self, filenodes, filelinkrevs): | ||
repo = self.repo | ||||
ui = self.ui | ||||
lrugetctx = self.lrugetctx | ||||
revlogv1 = self.revlogv1 | ||||
havemf = self.havemf | ||||
Durham Goode
|
r27443 | ui.status(_("checking files\n")) | ||
Henrik Stuart
|
r8291 | |||
Durham Goode
|
r27443 | storefiles = set() | ||
for f, f2, size in repo.store.datafiles(): | ||||
if not f: | ||||
Durham Goode
|
r27447 | self.err(None, _("cannot decode filename '%s'") % f2) | ||
Martin von Zweigbergk
|
r28007 | elif (size > 0 or not revlogv1) and f.startswith('data/'): | ||
Durham Goode
|
r27443 | storefiles.add(_normpath(f)) | ||
Adrian Buehlmann
|
r6892 | |||
Durham Goode
|
r27443 | files = sorted(set(filenodes) | set(filelinkrevs)) | ||
total = len(files) | ||||
Durham Goode
|
r27644 | revisions = 0 | ||
Durham Goode
|
r27443 | for i, f in enumerate(files): | ||
r28467 | ui.progress(_('checking'), i, item=f, total=total, unit=_('files')) | |||
Adrian Buehlmann
|
r6892 | try: | ||
Durham Goode
|
r27443 | linkrevs = filelinkrevs[f] | ||
Adrian Buehlmann
|
r6892 | except KeyError: | ||
Durham Goode
|
r27443 | # in manifest but not in changelog | ||
linkrevs = [] | ||||
Matt Mackall
|
r2778 | |||
Durham Goode
|
r27443 | if linkrevs: | ||
lr = linkrevs[0] | ||||
else: | ||||
lr = None | ||||
Matt Mackall
|
r2778 | |||
Matt Mackall
|
r3744 | try: | ||
Durham Goode
|
r27443 | fl = repo.file(f) | ||
except error.RevlogError as e: | ||||
Durham Goode
|
r27447 | self.err(lr, _("broken revlog! (%s)") % e, f) | ||
Durham Goode
|
r27443 | continue | ||
for ff in fl.files(): | ||||
try: | ||||
storefiles.remove(ff) | ||||
except KeyError: | ||||
Durham Goode
|
r27446 | self.warn(_(" warning: revlog '%s' not in fncache!") % ff) | ||
Durham Goode
|
r27445 | self.fncachewarned = True | ||
Durham Goode
|
r27443 | |||
Durham Goode
|
r27642 | self.checklog(fl, f, lr) | ||
Durham Goode
|
r27443 | seen = {} | ||
rp = None | ||||
for i in fl: | ||||
revisions += 1 | ||||
n = fl.node(i) | ||||
Durham Goode
|
r27643 | lr = self.checkentry(fl, i, n, seen, linkrevs, f) | ||
Durham Goode
|
r27443 | if f in filenodes: | ||
if havemf and n not in filenodes[f]: | ||||
Durham Goode
|
r27447 | self.err(lr, _("%s not in manifests") % (short(n)), f) | ||
Patrick Mezard
|
r6534 | else: | ||
Durham Goode
|
r27443 | del filenodes[f][n] | ||
# verify contents | ||||
try: | ||||
l = len(fl.read(n)) | ||||
rp = fl.renamed(n) | ||||
if l != fl.size(i): | ||||
if len(fl.revision(n)) != fl.size(i): | ||||
Durham Goode
|
r27447 | self.err(lr, _("unpacked size is %s, %s expected") % | ||
(l, fl.size(i)), f) | ||||
Durham Goode
|
r27443 | except error.CensoredNodeError: | ||
# experimental config: censor.policy | ||||
if ui.config("censor", "policy", "abort") == "abort": | ||||
Durham Goode
|
r27447 | self.err(lr, _("censored file data"), f) | ||
Durham Goode
|
r27443 | except Exception as inst: | ||
Durham Goode
|
r27448 | self.exc(lr, _("unpacking %s") % short(n), inst, f) | ||
Matt Mackall
|
r3744 | |||
Durham Goode
|
r27443 | # check renames | ||
try: | ||||
if rp: | ||||
if lr is not None and ui.verbose: | ||||
ctx = lrugetctx(lr) | ||||
found = False | ||||
for pctx in ctx.parents(): | ||||
if rp[0] in pctx: | ||||
found = True | ||||
break | ||||
if not found: | ||||
Durham Goode
|
r27446 | self.warn(_("warning: copy source of '%s' not" | ||
" in parents of %s") % (f, ctx)) | ||||
Durham Goode
|
r27443 | fl2 = repo.file(rp[0]) | ||
if not len(fl2): | ||||
Durham Goode
|
r27447 | self.err(lr, _("empty or missing copy source " | ||
"revlog %s:%s") % (rp[0], short(rp[1])), f) | ||||
Durham Goode
|
r27443 | elif rp[1] == nullid: | ||
ui.note(_("warning: %s@%s: copy source" | ||||
" revision is nullid %s:%s\n") | ||||
% (f, lr, rp[0], short(rp[1]))) | ||||
else: | ||||
fl2.rev(rp[1]) | ||||
except Exception as inst: | ||||
Durham Goode
|
r27448 | self.exc(lr, _("checking rename of %s") % short(n), inst, f) | ||
Adrian Buehlmann
|
r6892 | |||
Durham Goode
|
r27443 | # cross-check | ||
if f in filenodes: | ||||
fns = [(lr, n) for n, lr in filenodes[f].iteritems()] | ||||
for lr, node in sorted(fns): | ||||
Martin von Zweigbergk
|
r28114 | self.err(lr, _("manifest refers to unknown revision %s") % | ||
short(node), f) | ||||
Durham Goode
|
r27443 | ui.progress(_('checking'), None) | ||
Martin von Zweigbergk
|
r28204 | for f in sorted(storefiles): | ||
Durham Goode
|
r27446 | self.warn(_("warning: orphan revlog '%s'") % f) | ||
Durham Goode
|
r27443 | |||
Durham Goode
|
r27644 | return len(files), revisions | ||