##// END OF EJS Templates
gitweb: link to the correct feeds from help pages
gitweb: link to the correct feeds from help pages

File last commit:

r27453:8462d7f2 default
r27548:435c9068 default
Show More
verify.py
363 lines | 13.4 KiB | text/x-python | PythonLexer
Matt Mackall
Move repo.verify
r2778 # verify.py - repository integrity checking for Mercurial
#
Thomas Arendsen Hein
Updated copyright notices and add "and others" to "hg version"
r4635 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
Matt Mackall
Move repo.verify
r2778 #
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Matt Mackall
Move repo.verify
r2778
Gregory Szorc
verify: use absolute_import
r25991 from __future__ import absolute_import
Bryan O'Sullivan
verify: fix all doubled-slash sites (issue3665)
r17860 import os
Gregory Szorc
verify: use absolute_import
r25991
from .i18n import _
from .node import (
nullid,
short,
)
from . import (
error,
revlog,
util,
)
Matt Mackall
Move repo.verify
r2778
def verify(repo):
Matt Mackall
Use try/finally pattern to cleanup locks and transactions
r4915 lock = repo.lock()
try:
Durham Goode
verify: move widely used variables into class members...
r27444 return verifier(repo).verify()
Matt Mackall
Use try/finally pattern to cleanup locks and transactions
r4915 finally:
Ronny Pfannschmidt
switch lock releasing in the core from gc to explicit
r8109 lock.release()
Matt Mackall
Use try/finally pattern to cleanup locks and transactions
r4915
Bryan O'Sullivan
verify: fix all doubled-slash sites (issue3665)
r17860 def _normpath(f):
# under hg < 2.4, convert didn't sanitize paths properly, so a
# converted repo may contain repeated slashes
while '//' in f:
f = f.replace('//', '/')
return f
Augie Fackler
verify: add a hook that can let extensions manipulate file lists...
r26900 def _validpath(repo, path):
"""Returns False if a path should NOT be treated as part of a repo.
For all in-core cases, this returns True, as we have no way for a
path to be mentioned in the history but not actually be
relevant. For narrow clones, this is important because many
filelogs will be missing, and changelog entries may mention
modified files that are outside the narrow scope.
"""
return True
Durham Goode
verify: move verify logic into a class...
r27443 class verifier(object):
Durham Goode
verify: move widely used variables into class members...
r27444 def __init__(self, repo):
self.repo = repo.unfiltered()
self.ui = repo.ui
self.badrevs = set()
Matt Mackall
verify: clean up weird error/warning lists...
r27453 self.errors = 0
self.warnings = 0
Durham Goode
verify: move widely used variables into class members...
r27444 self.havecl = len(repo.changelog) > 0
self.havemf = len(repo.manifest) > 0
self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
self.lrugetctx = util.lrucachefunc(repo.changectx)
self.refersmf = False
Durham Goode
verify: move fncachewarned up to a class variable...
r27445 self.fncachewarned = False
Durham Goode
verify: move widely used variables into class members...
r27444
Durham Goode
verify: move warn() to a class level function...
r27446 def warn(self, msg):
self.ui.warn(msg + "\n")
Matt Mackall
verify: clean up weird error/warning lists...
r27453 self.warnings += 1
Durham Goode
verify: move warn() to a class level function...
r27446
Durham Goode
verify: move err() to be a class function...
r27447 def err(self, linkrev, msg, filename=None):
if linkrev is not None:
self.badrevs.add(linkrev)
else:
linkrev = '?'
msg = "%s: %s" % (linkrev, msg)
if filename:
msg = "%s@%s" % (filename, msg)
self.ui.warn(" " + msg + "\n")
Matt Mackall
verify: clean up weird error/warning lists...
r27453 self.errors += 1
Durham Goode
verify: move err() to be a class function...
r27447
Durham Goode
verify: move exc() function onto class...
r27448 def exc(self, linkrev, msg, inst, filename=None):
if not str(inst):
inst = repr(inst)
self.err(linkrev, "%s: %s" % (msg, inst), filename)
Durham Goode
verify: move widely used variables into class members...
r27444 def verify(self):
repo = self.repo
Durham Goode
verify: move verify logic into a class...
r27443 mflinkrevs = {}
filelinkrevs = {}
filenodes = {}
revisions = 0
Durham Goode
verify: move widely used variables into class members...
r27444 badrevs = self.badrevs
Durham Goode
verify: move verify logic into a class...
r27443 ui = repo.ui
cl = repo.changelog
mf = repo.manifest
Durham Goode
verify: move widely used variables into class members...
r27444 lrugetctx = self.lrugetctx
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 if not repo.url().startswith('file:'):
raise error.Abort(_("cannot verify bundle or remote repos"))
Matt Mackall
verify: lots of refactoring...
r6752
Durham Goode
verify: move verify logic into a class...
r27443 def checklog(obj, name, linkrev):
if not len(obj) and (havecl or havemf):
Durham Goode
verify: move err() to be a class function...
r27447 self.err(linkrev, _("empty or missing %s") % name)
Durham Goode
verify: move verify logic into a class...
r27443 return
Matt Mackall
verify: lots of refactoring...
r6752
Durham Goode
verify: move verify logic into a class...
r27443 d = obj.checksize()
if d[0]:
Durham Goode
verify: move err() to be a class function...
r27447 self.err(None, _("data length off by %d bytes") % d[0], name)
Durham Goode
verify: move verify logic into a class...
r27443 if d[1]:
Durham Goode
verify: move err() to be a class function...
r27447 self.err(None, _("index contains %d extra bytes") % d[1], name)
Sune Foldager
verify: report existence of journal
r9690
Durham Goode
verify: move verify logic into a class...
r27443 if obj.version != revlog.REVLOGV0:
if not revlogv1:
Durham Goode
verify: move warn() to a class level function...
r27446 self.warn(_("warning: `%s' uses revlog format 1") % name)
Durham Goode
verify: move verify logic into a class...
r27443 elif revlogv1:
Durham Goode
verify: move warn() to a class level function...
r27446 self.warn(_("warning: `%s' uses revlog format 0") % name)
Matt Mackall
verify: lots of refactoring...
r6752
Durham Goode
verify: move verify logic into a class...
r27443 def checkentry(obj, i, node, seen, linkrevs, f):
lr = obj.linkrev(obj.rev(node))
if lr < 0 or (havecl and lr not in linkrevs):
if lr < 0 or lr >= len(cl):
msg = _("rev %d points to nonexistent changeset %d")
else:
msg = _("rev %d points to unexpected changeset %d")
Durham Goode
verify: move err() to be a class function...
r27447 self.err(None, msg % (i, lr), f)
Durham Goode
verify: move verify logic into a class...
r27443 if linkrevs:
if f and len(linkrevs) > 1:
try:
# attempt to filter down to real linkrevs
linkrevs = [l for l in linkrevs
if lrugetctx(l)[f].filenode() == node]
except Exception:
pass
Durham Goode
verify: move warn() to a class level function...
r27446 self.warn(_(" (expected %s)") %
" ".join(map(str, linkrevs)))
Durham Goode
verify: move verify logic into a class...
r27443 lr = None # can't be trusted
try:
p1, p2 = obj.parents(node)
if p1 not in seen and p1 != nullid:
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("unknown parent 1 %s of %s") %
(short(p1), short(node)), f)
Durham Goode
verify: move verify logic into a class...
r27443 if p2 not in seen and p2 != nullid:
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("unknown parent 2 %s of %s") %
(short(p2), short(node)), f)
Durham Goode
verify: move verify logic into a class...
r27443 except Exception as inst:
Durham Goode
verify: move exc() function onto class...
r27448 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 if node in seen:
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("duplicate revision %d (%d)") %
(i, seen[node]), f)
Durham Goode
verify: move verify logic into a class...
r27443 seen[node] = i
return lr
if os.path.exists(repo.sjoin("journal")):
ui.warn(_("abandoned transaction found - run hg recover\n"))
Durham Goode
verify: move widely used variables into class members...
r27444 revlogv1 = self.revlogv1
Durham Goode
verify: move verify logic into a class...
r27443 if ui.verbose or not revlogv1:
ui.status(_("repository uses revlog format %d\n") %
(revlogv1 and 1 or 0))
Durham Goode
verify: move widely used variables into class members...
r27444 havecl = self.havecl
havemf = self.havemf
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 ui.status(_("checking changesets\n"))
seen = {}
checklog(cl, "changelog", 0)
total = len(repo)
for i in repo:
ui.progress(_('checking'), i, total=total, unit=_('changesets'))
n = cl.node(i)
checkentry(cl, i, n, seen, [i], "changelog")
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 try:
changes = cl.read(n)
if changes[0] != nullid:
mflinkrevs.setdefault(changes[0], []).append(i)
Durham Goode
verify: move widely used variables into class members...
r27444 self.refersmf = True
Durham Goode
verify: move verify logic into a class...
r27443 for f in changes[3]:
Augie Fackler
verify: add a hook that can let extensions manipulate file lists...
r26900 if _validpath(repo, f):
Durham Goode
verify: move verify logic into a class...
r27443 filelinkrevs.setdefault(_normpath(f), []).append(i)
except Exception as inst:
Durham Goode
verify: move widely used variables into class members...
r27444 self.refersmf = True
Durham Goode
verify: move exc() function onto class...
r27448 self.exc(i, _("unpacking changeset %s") % short(n), inst)
Durham Goode
verify: move verify logic into a class...
r27443 ui.progress(_('checking'), None)
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 ui.status(_("checking manifests\n"))
seen = {}
Durham Goode
verify: move widely used variables into class members...
r27444 if self.refersmf:
Durham Goode
verify: move verify logic into a class...
r27443 # Do not check manifest if there are only changelog entries with
# null manifests.
checklog(mf, "manifest", 0)
total = len(mf)
for i in mf:
ui.progress(_('checking'), i, total=total, unit=_('manifests'))
n = mf.node(i)
lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
if n in mflinkrevs:
del mflinkrevs[n]
else:
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("%s not in changesets") % short(n), "manifest")
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 try:
for f, fn in mf.readdelta(n).iteritems():
if not f:
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("file without name in manifest"))
Durham Goode
verify: move verify logic into a class...
r27443 elif f != "/dev/null": # ignore this in very old repos
if _validpath(repo, f):
filenodes.setdefault(
_normpath(f), {}).setdefault(fn, lr)
except Exception as inst:
Durham Goode
verify: move exc() function onto class...
r27448 self.exc(lr, _("reading manifest delta %s") % short(n), inst)
Durham Goode
verify: move verify logic into a class...
r27443 ui.progress(_('checking'), None)
ui.status(_("crosschecking files in changesets and manifests\n"))
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
count = 0
if havemf:
for c, m in sorted([(c, m) for m in mflinkrevs
for c in mflinkrevs[m]]):
count += 1
if m == nullid:
continue
ui.progress(_('crosschecking'), count, total=total)
Durham Goode
verify: move err() to be a class function...
r27447 self.err(c, _("changeset refers to unknown manifest %s") %
short(m))
Durham Goode
verify: move verify logic into a class...
r27443 mflinkrevs = None # del is bad here due to scope issues
Matt Mackall
verify: lots of refactoring...
r6752
Durham Goode
verify: move verify logic into a class...
r27443 for f in sorted(filelinkrevs):
count += 1
ui.progress(_('crosschecking'), count, total=total)
if f not in filenodes:
lr = filelinkrevs[f][0]
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("in changeset but not in manifest"), f)
Adrian Buehlmann
verify: check repo.store
r6892
Durham Goode
verify: move verify logic into a class...
r27443 if havecl:
for f in sorted(filenodes):
count += 1
ui.progress(_('crosschecking'), count, total=total)
if f not in filelinkrevs:
try:
fl = repo.file(f)
lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
except Exception:
lr = None
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("in manifest but not in changeset"), f)
Durham Goode
verify: move verify logic into a class...
r27443
ui.progress(_('crosschecking'), None)
Henrik Stuart
verify: avoid exception on missing file revlog...
r8291
Durham Goode
verify: move verify logic into a class...
r27443 ui.status(_("checking files\n"))
Henrik Stuart
verify: avoid exception on missing file revlog...
r8291
Durham Goode
verify: move verify logic into a class...
r27443 storefiles = set()
for f, f2, size in repo.store.datafiles():
if not f:
Durham Goode
verify: move err() to be a class function...
r27447 self.err(None, _("cannot decode filename '%s'") % f2)
Durham Goode
verify: move verify logic into a class...
r27443 elif size > 0 or not revlogv1:
storefiles.add(_normpath(f))
Adrian Buehlmann
verify: check repo.store
r6892
Durham Goode
verify: move verify logic into a class...
r27443 files = sorted(set(filenodes) | set(filelinkrevs))
total = len(files)
for i, f in enumerate(files):
ui.progress(_('checking'), i, item=f, total=total)
Adrian Buehlmann
verify: check repo.store
r6892 try:
Durham Goode
verify: move verify logic into a class...
r27443 linkrevs = filelinkrevs[f]
Adrian Buehlmann
verify: check repo.store
r6892 except KeyError:
Durham Goode
verify: move verify logic into a class...
r27443 # in manifest but not in changelog
linkrevs = []
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 if linkrevs:
lr = linkrevs[0]
else:
lr = None
Matt Mackall
Move repo.verify
r2778
Matt Mackall
verify: add rename link checking
r3744 try:
Durham Goode
verify: move verify logic into a class...
r27443 fl = repo.file(f)
except error.RevlogError as e:
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("broken revlog! (%s)") % e, f)
Durham Goode
verify: move verify logic into a class...
r27443 continue
for ff in fl.files():
try:
storefiles.remove(ff)
except KeyError:
Durham Goode
verify: move warn() to a class level function...
r27446 self.warn(_(" warning: revlog '%s' not in fncache!") % ff)
Durham Goode
verify: move fncachewarned up to a class variable...
r27445 self.fncachewarned = True
Durham Goode
verify: move verify logic into a class...
r27443
checklog(fl, f, lr)
seen = {}
rp = None
for i in fl:
revisions += 1
n = fl.node(i)
lr = checkentry(fl, i, n, seen, linkrevs, f)
if f in filenodes:
if havemf and n not in filenodes[f]:
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("%s not in manifests") % (short(n)), f)
Patrick Mezard
verify: check copy source revlog and nodeid
r6534 else:
Durham Goode
verify: move verify logic into a class...
r27443 del filenodes[f][n]
# verify contents
try:
l = len(fl.read(n))
rp = fl.renamed(n)
if l != fl.size(i):
if len(fl.revision(n)) != fl.size(i):
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("unpacked size is %s, %s expected") %
(l, fl.size(i)), f)
Durham Goode
verify: move verify logic into a class...
r27443 except error.CensoredNodeError:
# experimental config: censor.policy
if ui.config("censor", "policy", "abort") == "abort":
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("censored file data"), f)
Durham Goode
verify: move verify logic into a class...
r27443 except Exception as inst:
Durham Goode
verify: move exc() function onto class...
r27448 self.exc(lr, _("unpacking %s") % short(n), inst, f)
Matt Mackall
verify: add rename link checking
r3744
Durham Goode
verify: move verify logic into a class...
r27443 # check renames
try:
if rp:
if lr is not None and ui.verbose:
ctx = lrugetctx(lr)
found = False
for pctx in ctx.parents():
if rp[0] in pctx:
found = True
break
if not found:
Durham Goode
verify: move warn() to a class level function...
r27446 self.warn(_("warning: copy source of '%s' not"
" in parents of %s") % (f, ctx))
Durham Goode
verify: move verify logic into a class...
r27443 fl2 = repo.file(rp[0])
if not len(fl2):
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("empty or missing copy source "
"revlog %s:%s") % (rp[0], short(rp[1])), f)
Durham Goode
verify: move verify logic into a class...
r27443 elif rp[1] == nullid:
ui.note(_("warning: %s@%s: copy source"
" revision is nullid %s:%s\n")
% (f, lr, rp[0], short(rp[1])))
else:
fl2.rev(rp[1])
except Exception as inst:
Durham Goode
verify: move exc() function onto class...
r27448 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
Adrian Buehlmann
verify: check repo.store
r6892
Durham Goode
verify: move verify logic into a class...
r27443 # cross-check
if f in filenodes:
fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
for lr, node in sorted(fns):
Durham Goode
verify: move err() to be a class function...
r27447 self.err(lr, _("%s in manifests not found") % short(node),
f)
Durham Goode
verify: move verify logic into a class...
r27443 ui.progress(_('checking'), None)
for f in storefiles:
Durham Goode
verify: move warn() to a class level function...
r27446 self.warn(_("warning: orphan revlog '%s'") % f)
Durham Goode
verify: move verify logic into a class...
r27443
ui.status(_("%d files, %d changesets, %d total revisions\n") %
(len(files), len(cl), revisions))
Matt Mackall
verify: clean up weird error/warning lists...
r27453 if self.warnings:
ui.warn(_("%d warnings encountered!\n") % self.warnings)
Durham Goode
verify: move fncachewarned up to a class variable...
r27445 if self.fncachewarned:
Durham Goode
verify: move verify logic into a class...
r27443 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
'corrupt fncache\n'))
Matt Mackall
verify: clean up weird error/warning lists...
r27453 if self.errors:
ui.warn(_("%d integrity errors encountered!\n") % self.errors)
Durham Goode
verify: move verify logic into a class...
r27443 if badrevs:
ui.warn(_("(first damaged changeset appears to be %d)\n")
% min(badrevs))
return 1