##// END OF EJS Templates
verify: move filelog verification to its own function...
verify: move filelog verification to its own function This makes verify more modular so extensions can hook in more easily.

File last commit:

r27644:331e5c28 default
r27644:331e5c28 default
Show More
verify.py
373 lines | 13.6 KiB | text/x-python | PythonLexer
Matt Mackall
Move repo.verify
r2778 # verify.py - repository integrity checking for Mercurial
#
Thomas Arendsen Hein
Updated copyright notices and add "and others" to "hg version"
r4635 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
Matt Mackall
Move repo.verify
r2778 #
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Matt Mackall
Move repo.verify
r2778
Gregory Szorc
verify: use absolute_import
r25991 from __future__ import absolute_import
Bryan O'Sullivan
verify: fix all doubled-slash sites (issue3665)
r17860 import os
Gregory Szorc
verify: use absolute_import
r25991
from .i18n import _
from .node import (
nullid,
short,
)
from . import (
error,
revlog,
util,
)
Matt Mackall
Move repo.verify
r2778
def verify(repo):
Matt Mackall
Use try/finally pattern to cleanup locks and transactions
r4915 lock = repo.lock()
try:
Durham Goode
verify: move widely used variables into class members...
r27444 return verifier(repo).verify()
Matt Mackall
Use try/finally pattern to cleanup locks and transactions
r4915 finally:
Ronny Pfannschmidt
switch lock releasing in the core from gc to explicit
r8109 lock.release()
Matt Mackall
Use try/finally pattern to cleanup locks and transactions
r4915
Bryan O'Sullivan
verify: fix all doubled-slash sites (issue3665)
r17860 def _normpath(f):
# under hg < 2.4, convert didn't sanitize paths properly, so a
# converted repo may contain repeated slashes
while '//' in f:
f = f.replace('//', '/')
return f
Augie Fackler
verify: add a hook that can let extensions manipulate file lists...
r26900 def _validpath(repo, path):
"""Returns False if a path should NOT be treated as part of a repo.
For all in-core cases, this returns True, as we have no way for a
path to be mentioned in the history but not actually be
relevant. For narrow clones, this is important because many
filelogs will be missing, and changelog entries may mention
modified files that are outside the narrow scope.
"""
return True
Durham Goode
verify: move verify logic into a class...
r27443 class verifier(object):
Durham Goode
verify: move widely used variables into class members...
r27444 def __init__(self, repo):
self.repo = repo.unfiltered()
self.ui = repo.ui
self.badrevs = set()
Matt Mackall
verify: clean up weird error/warning lists...
r27453 self.errors = 0
self.warnings = 0
Durham Goode
verify: move widely used variables into class members...
r27444 self.havecl = len(repo.changelog) > 0
self.havemf = len(repo.manifest) > 0
self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
self.lrugetctx = util.lrucachefunc(repo.changectx)
self.refersmf = False
Durham Goode
verify: move fncachewarned up to a class variable...
r27445 self.fncachewarned = False
Durham Goode
verify: move widely used variables into class members...
r27444
Durham Goode
verify: move warn() to a class level function...
r27446 def warn(self, msg):
self.ui.warn(msg + "\n")
Matt Mackall
verify: clean up weird error/warning lists...
r27453 self.warnings += 1
Durham Goode
verify: move warn() to a class level function...
r27446
Durham Goode
verify: move err() to be a class function...
r27447 def err(self, linkrev, msg, filename=None):
if linkrev is not None:
self.badrevs.add(linkrev)
else:
linkrev = '?'
msg = "%s: %s" % (linkrev, msg)
if filename:
msg = "%s@%s" % (filename, msg)
self.ui.warn(" " + msg + "\n")
Matt Mackall
verify: clean up weird error/warning lists...
r27453 self.errors += 1
Durham Goode
verify: move err() to be a class function...
r27447
Durham Goode
verify: move exc() function onto class...
r27448 def exc(self, linkrev, msg, inst, filename=None):
if not str(inst):
inst = repr(inst)
self.err(linkrev, "%s: %s" % (msg, inst), filename)
Durham Goode
verify: move checklog() onto class...
r27642 def checklog(self, obj, name, linkrev):
if not len(obj) and (self.havecl or self.havemf):
self.err(linkrev, _("empty or missing %s") % name)
return
d = obj.checksize()
if d[0]:
self.err(None, _("data length off by %d bytes") % d[0], name)
if d[1]:
self.err(None, _("index contains %d extra bytes") % d[1], name)
if obj.version != revlog.REVLOGV0:
if not self.revlogv1:
self.warn(_("warning: `%s' uses revlog format 1") % name)
elif self.revlogv1:
self.warn(_("warning: `%s' uses revlog format 0") % name)
Durham Goode
verify: move checkentry() to be a class function...
r27643 def checkentry(self, obj, i, node, seen, linkrevs, f):
lr = obj.linkrev(obj.rev(node))
if lr < 0 or (self.havecl and lr not in linkrevs):
if lr < 0 or lr >= len(self.repo.changelog):
msg = _("rev %d points to nonexistent changeset %d")
else:
msg = _("rev %d points to unexpected changeset %d")
self.err(None, msg % (i, lr), f)
if linkrevs:
if f and len(linkrevs) > 1:
try:
# attempt to filter down to real linkrevs
linkrevs = [l for l in linkrevs
if self.lrugetctx(l)[f].filenode() == node]
except Exception:
pass
self.warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
lr = None # can't be trusted
try:
p1, p2 = obj.parents(node)
if p1 not in seen and p1 != nullid:
self.err(lr, _("unknown parent 1 %s of %s") %
(short(p1), short(node)), f)
if p2 not in seen and p2 != nullid:
self.err(lr, _("unknown parent 2 %s of %s") %
(short(p2), short(node)), f)
except Exception as inst:
self.exc(lr, _("checking parents of %s") % short(node), inst, f)
if node in seen:
self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
seen[node] = i
return lr
Durham Goode
verify: move widely used variables into class members...
r27444 def verify(self):
repo = self.repo
Durham Goode
verify: move verify logic into a class...
r27443 mflinkrevs = {}
filelinkrevs = {}
filenodes = {}
revisions = 0
Durham Goode
verify: move widely used variables into class members...
r27444 badrevs = self.badrevs
Durham Goode
verify: move verify logic into a class...
r27443 ui = repo.ui
cl = repo.changelog
mf = repo.manifest
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 if not repo.url().startswith('file:'):
raise error.Abort(_("cannot verify bundle or remote repos"))
Matt Mackall
verify: lots of refactoring...
r6752
Durham Goode
verify: move verify logic into a class...
r27443 if os.path.exists(repo.sjoin("journal")):
ui.warn(_("abandoned transaction found - run hg recover\n"))
Durham Goode
verify: move widely used variables into class members...
r27444 revlogv1 = self.revlogv1
Durham Goode
verify: move verify logic into a class...
r27443 if ui.verbose or not revlogv1:
ui.status(_("repository uses revlog format %d\n") %
(revlogv1 and 1 or 0))
Durham Goode
verify: move widely used variables into class members...
r27444 havecl = self.havecl
havemf = self.havemf
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 ui.status(_("checking changesets\n"))
seen = {}
Durham Goode
verify: move checklog() onto class...
r27642 self.checklog(cl, "changelog", 0)
Durham Goode
verify: move verify logic into a class...
r27443 total = len(repo)
for i in repo:
ui.progress(_('checking'), i, total=total, unit=_('changesets'))
n = cl.node(i)
Durham Goode
verify: move checkentry() to be a class function...
r27643 self.checkentry(cl, i, n, seen, [i], "changelog")
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 try:
changes = cl.read(n)
if changes[0] != nullid:
mflinkrevs.setdefault(changes[0], []).append(i)
Durham Goode
verify: move widely used variables into class members...
r27444 self.refersmf = True
Durham Goode
verify: move verify logic into a class...
r27443 for f in changes[3]:
Augie Fackler
verify: add a hook that can let extensions manipulate file lists...
r26900 if _validpath(repo, f):
Durham Goode
verify: move verify logic into a class...
r27443 filelinkrevs.setdefault(_normpath(f), []).append(i)
except Exception as inst:
Durham Goode
verify: move widely used variables into class members...
r27444 self.refersmf = True
Durham Goode
verify: move exc() function onto class...
r27448 self.exc(i, _("unpacking changeset %s") % short(n), inst)
Durham Goode
verify: move verify logic into a class...
r27443 ui.progress(_('checking'), None)
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 ui.status(_("checking manifests\n"))
seen = {}
Durham Goode
verify: move widely used variables into class members...
r27444 if self.refersmf:
Durham Goode
verify: move verify logic into a class...
r27443 # Do not check manifest if there are only changelog entries with
# null manifests.
Durham Goode
verify: move checklog() onto class...
r27642 self.checklog(mf, "manifest", 0)
Durham Goode
verify: move verify logic into a class...
r27443 total = len(mf)
for i in mf:
ui.progress(_('checking'), i, total=total, unit=_('manifests'))
n = mf.node(i)
Durham Goode
verify: move checkentry() to be a class function...
r27643 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []),
"manifest")
Durham Goode
verify: move verify logic into a class...
r27443 if n in mflinkrevs:
del mflinkrevs[n]
else:
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("%s not in changesets") % short(n), "manifest")
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 try:
for f, fn in mf.readdelta(n).iteritems():
if not f:
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("file without name in manifest"))
Durham Goode
verify: move verify logic into a class...
r27443 elif f != "/dev/null": # ignore this in very old repos
if _validpath(repo, f):
filenodes.setdefault(
_normpath(f), {}).setdefault(fn, lr)
except Exception as inst:
Durham Goode
verify: move exc() function onto class...
r27448 self.exc(lr, _("reading manifest delta %s") % short(n), inst)
Durham Goode
verify: move verify logic into a class...
r27443 ui.progress(_('checking'), None)
ui.status(_("crosschecking files in changesets and manifests\n"))
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
count = 0
if havemf:
for c, m in sorted([(c, m) for m in mflinkrevs
for c in mflinkrevs[m]]):
count += 1
if m == nullid:
continue
ui.progress(_('crosschecking'), count, total=total)
Durham Goode
verify: move err() to be a class function...
r27447 self.err(c, _("changeset refers to unknown manifest %s") %
short(m))
Durham Goode
verify: move verify logic into a class...
r27443 mflinkrevs = None # del is bad here due to scope issues
Matt Mackall
verify: lots of refactoring...
r6752
Durham Goode
verify: move verify logic into a class...
r27443 for f in sorted(filelinkrevs):
count += 1
ui.progress(_('crosschecking'), count, total=total)
if f not in filenodes:
lr = filelinkrevs[f][0]
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("in changeset but not in manifest"), f)
Adrian Buehlmann
verify: check repo.store
r6892
Durham Goode
verify: move verify logic into a class...
r27443 if havecl:
for f in sorted(filenodes):
count += 1
ui.progress(_('crosschecking'), count, total=total)
if f not in filelinkrevs:
try:
fl = repo.file(f)
lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
except Exception:
lr = None
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("in manifest but not in changeset"), f)
Durham Goode
verify: move verify logic into a class...
r27443
ui.progress(_('crosschecking'), None)
Henrik Stuart
verify: avoid exception on missing file revlog...
r8291
Durham Goode
verify: move filelog verification to its own function...
r27644 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
revisions += filerevisions
ui.status(_("%d files, %d changesets, %d total revisions\n") %
(totalfiles, len(cl), revisions))
if self.warnings:
ui.warn(_("%d warnings encountered!\n") % self.warnings)
if self.fncachewarned:
ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
'corrupt fncache\n'))
if self.errors:
ui.warn(_("%d integrity errors encountered!\n") % self.errors)
if badrevs:
ui.warn(_("(first damaged changeset appears to be %d)\n")
% min(badrevs))
return 1
def _verifyfiles(self, filenodes, filelinkrevs):
repo = self.repo
ui = self.ui
lrugetctx = self.lrugetctx
revlogv1 = self.revlogv1
havemf = self.havemf
Durham Goode
verify: move verify logic into a class...
r27443 ui.status(_("checking files\n"))
Henrik Stuart
verify: avoid exception on missing file revlog...
r8291
Durham Goode
verify: move verify logic into a class...
r27443 storefiles = set()
for f, f2, size in repo.store.datafiles():
if not f:
Durham Goode
verify: move err() to be a class function...
r27447 self.err(None, _("cannot decode filename '%s'") % f2)
Durham Goode
verify: move verify logic into a class...
r27443 elif size > 0 or not revlogv1:
storefiles.add(_normpath(f))
Adrian Buehlmann
verify: check repo.store
r6892
Durham Goode
verify: move verify logic into a class...
r27443 files = sorted(set(filenodes) | set(filelinkrevs))
total = len(files)
Durham Goode
verify: move filelog verification to its own function...
r27644 revisions = 0
Durham Goode
verify: move verify logic into a class...
r27443 for i, f in enumerate(files):
ui.progress(_('checking'), i, item=f, total=total)
Adrian Buehlmann
verify: check repo.store
r6892 try:
Durham Goode
verify: move verify logic into a class...
r27443 linkrevs = filelinkrevs[f]
Adrian Buehlmann
verify: check repo.store
r6892 except KeyError:
Durham Goode
verify: move verify logic into a class...
r27443 # in manifest but not in changelog
linkrevs = []
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 if linkrevs:
lr = linkrevs[0]
else:
lr = None
Matt Mackall
Move repo.verify
r2778
Matt Mackall
verify: add rename link checking
r3744 try:
Durham Goode
verify: move verify logic into a class...
r27443 fl = repo.file(f)
except error.RevlogError as e:
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("broken revlog! (%s)") % e, f)
Durham Goode
verify: move verify logic into a class...
r27443 continue
for ff in fl.files():
try:
storefiles.remove(ff)
except KeyError:
Durham Goode
verify: move warn() to a class level function...
r27446 self.warn(_(" warning: revlog '%s' not in fncache!") % ff)
Durham Goode
verify: move fncachewarned up to a class variable...
r27445 self.fncachewarned = True
Durham Goode
verify: move verify logic into a class...
r27443
Durham Goode
verify: move checklog() onto class...
r27642 self.checklog(fl, f, lr)
Durham Goode
verify: move verify logic into a class...
r27443 seen = {}
rp = None
for i in fl:
revisions += 1
n = fl.node(i)
Durham Goode
verify: move checkentry() to be a class function...
r27643 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
Durham Goode
verify: move verify logic into a class...
r27443 if f in filenodes:
if havemf and n not in filenodes[f]:
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("%s not in manifests") % (short(n)), f)
Patrick Mezard
verify: check copy source revlog and nodeid
r6534 else:
Durham Goode
verify: move verify logic into a class...
r27443 del filenodes[f][n]
# verify contents
try:
l = len(fl.read(n))
rp = fl.renamed(n)
if l != fl.size(i):
if len(fl.revision(n)) != fl.size(i):
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("unpacked size is %s, %s expected") %
(l, fl.size(i)), f)
Durham Goode
verify: move verify logic into a class...
r27443 except error.CensoredNodeError:
# experimental config: censor.policy
if ui.config("censor", "policy", "abort") == "abort":
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("censored file data"), f)
Durham Goode
verify: move verify logic into a class...
r27443 except Exception as inst:
Durham Goode
verify: move exc() function onto class...
r27448 self.exc(lr, _("unpacking %s") % short(n), inst, f)
Matt Mackall
verify: add rename link checking
r3744
Durham Goode
verify: move verify logic into a class...
r27443 # check renames
try:
if rp:
if lr is not None and ui.verbose:
ctx = lrugetctx(lr)
found = False
for pctx in ctx.parents():
if rp[0] in pctx:
found = True
break
if not found:
Durham Goode
verify: move warn() to a class level function...
r27446 self.warn(_("warning: copy source of '%s' not"
" in parents of %s") % (f, ctx))
Durham Goode
verify: move verify logic into a class...
r27443 fl2 = repo.file(rp[0])
if not len(fl2):
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("empty or missing copy source "
"revlog %s:%s") % (rp[0], short(rp[1])), f)
Durham Goode
verify: move verify logic into a class...
r27443 elif rp[1] == nullid:
ui.note(_("warning: %s@%s: copy source"
" revision is nullid %s:%s\n")
% (f, lr, rp[0], short(rp[1])))
else:
fl2.rev(rp[1])
except Exception as inst:
Durham Goode
verify: move exc() function onto class...
r27448 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
Adrian Buehlmann
verify: check repo.store
r6892
Durham Goode
verify: move verify logic into a class...
r27443 # cross-check
if f in filenodes:
fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
for lr, node in sorted(fns):
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("%s in manifests not found") % short(node),
f)
Durham Goode
verify: move verify logic into a class...
r27443 ui.progress(_('checking'), None)
for f in storefiles:
Durham Goode
verify: move warn() to a class level function...
r27446 self.warn(_("warning: orphan revlog '%s'") % f)
Durham Goode
verify: move verify logic into a class...
r27443
Durham Goode
verify: move filelog verification to its own function...
r27644 return len(files), revisions