##// END OF EJS Templates
narrowmatcher: propagate the rel() method...
narrowmatcher: propagate the rel() method The full path is propagated to the original match object since this is often used directly for printing a file name to the user. This is cleaner than requiring each caller to join the prefix with the file name prior to calling it, and will lead to not having to pass the prefix around separately. It is also consistent with the bad() and abs() methods in terms of the required input. The uipath() method now inherits this path building property. There is no visible change in path style for rel() because it ultimately calls util.pathto(), which returns an os.sep based path. (The previous os.path.join() was violating the documented usage of util.pathto(), that its third parameter be '/' separated.) The doctest needed to be normalized to '/' separators to avoid test differences on Windows, now that a full path is returned for a short filename. The test changes are to drop globs that are no longer necessary when printing an absolute file in a subrepo, as returned from match.uipath(). Previously when os.path.join() was used to add the prefix, the absolute path to a file in a subrepo was printed with a mix of '/' and '\'. The absolute path for a file not in a subrepo, as returned from match.uipath(), is still purely '/' based.

File last commit:

r22933:3a60cd44 default
r23686:164915e8 default
Show More
verify.py
321 lines | 10.9 KiB | text/x-python | PythonLexer
# verify.py - repository integrity checking for Mercurial
#
# Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from node import nullid, short
from i18n import _
import os
import revlog, util, error
def verify(repo):
lock = repo.lock()
try:
return _verify(repo)
finally:
lock.release()
def _normpath(f):
# under hg < 2.4, convert didn't sanitize paths properly, so a
# converted repo may contain repeated slashes
while '//' in f:
f = f.replace('//', '/')
return f
def _verify(repo):
repo = repo.unfiltered()
mflinkrevs = {}
filelinkrevs = {}
filenodes = {}
revisions = 0
badrevs = set()
errors = [0]
warnings = [0]
ui = repo.ui
cl = repo.changelog
mf = repo.manifest
lrugetctx = util.lrucachefunc(repo.changectx)
if not repo.url().startswith('file:'):
raise util.Abort(_("cannot verify bundle or remote repos"))
def err(linkrev, msg, filename=None):
if linkrev is not None:
badrevs.add(linkrev)
else:
linkrev = '?'
msg = "%s: %s" % (linkrev, msg)
if filename:
msg = "%s@%s" % (filename, msg)
ui.warn(" " + msg + "\n")
errors[0] += 1
def exc(linkrev, msg, inst, filename=None):
if isinstance(inst, KeyboardInterrupt):
ui.warn(_("interrupted"))
raise
if not str(inst):
inst = repr(inst)
err(linkrev, "%s: %s" % (msg, inst), filename)
def warn(msg):
ui.warn(msg + "\n")
warnings[0] += 1
def checklog(obj, name, linkrev):
if not len(obj) and (havecl or havemf):
err(linkrev, _("empty or missing %s") % name)
return
d = obj.checksize()
if d[0]:
err(None, _("data length off by %d bytes") % d[0], name)
if d[1]:
err(None, _("index contains %d extra bytes") % d[1], name)
if obj.version != revlog.REVLOGV0:
if not revlogv1:
warn(_("warning: `%s' uses revlog format 1") % name)
elif revlogv1:
warn(_("warning: `%s' uses revlog format 0") % name)
def checkentry(obj, i, node, seen, linkrevs, f):
lr = obj.linkrev(obj.rev(node))
if lr < 0 or (havecl and lr not in linkrevs):
if lr < 0 or lr >= len(cl):
msg = _("rev %d points to nonexistent changeset %d")
else:
msg = _("rev %d points to unexpected changeset %d")
err(None, msg % (i, lr), f)
if linkrevs:
if f and len(linkrevs) > 1:
try:
# attempt to filter down to real linkrevs
linkrevs = [l for l in linkrevs
if lrugetctx(l)[f].filenode() == node]
except Exception:
pass
warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
lr = None # can't be trusted
try:
p1, p2 = obj.parents(node)
if p1 not in seen and p1 != nullid:
err(lr, _("unknown parent 1 %s of %s") %
(short(p1), short(node)), f)
if p2 not in seen and p2 != nullid:
err(lr, _("unknown parent 2 %s of %s") %
(short(p2), short(node)), f)
except Exception, inst:
exc(lr, _("checking parents of %s") % short(node), inst, f)
if node in seen:
err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
seen[node] = i
return lr
if os.path.exists(repo.sjoin("journal")):
ui.warn(_("abandoned transaction found - run hg recover\n"))
revlogv1 = cl.version != revlog.REVLOGV0
if ui.verbose or not revlogv1:
ui.status(_("repository uses revlog format %d\n") %
(revlogv1 and 1 or 0))
havecl = len(cl) > 0
havemf = len(mf) > 0
ui.status(_("checking changesets\n"))
refersmf = False
seen = {}
checklog(cl, "changelog", 0)
total = len(repo)
for i in repo:
ui.progress(_('checking'), i, total=total, unit=_('changesets'))
n = cl.node(i)
checkentry(cl, i, n, seen, [i], "changelog")
try:
changes = cl.read(n)
if changes[0] != nullid:
mflinkrevs.setdefault(changes[0], []).append(i)
refersmf = True
for f in changes[3]:
filelinkrevs.setdefault(_normpath(f), []).append(i)
except Exception, inst:
refersmf = True
exc(i, _("unpacking changeset %s") % short(n), inst)
ui.progress(_('checking'), None)
ui.status(_("checking manifests\n"))
seen = {}
if refersmf:
# Do not check manifest if there are only changelog entries with
# null manifests.
checklog(mf, "manifest", 0)
total = len(mf)
for i in mf:
ui.progress(_('checking'), i, total=total, unit=_('manifests'))
n = mf.node(i)
lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
if n in mflinkrevs:
del mflinkrevs[n]
else:
err(lr, _("%s not in changesets") % short(n), "manifest")
try:
for f, fn in mf.readdelta(n).iteritems():
if not f:
err(lr, _("file without name in manifest"))
elif f != "/dev/null":
filenodes.setdefault(_normpath(f), {}).setdefault(fn, lr)
except Exception, inst:
exc(lr, _("reading manifest delta %s") % short(n), inst)
ui.progress(_('checking'), None)
ui.status(_("crosschecking files in changesets and manifests\n"))
total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
count = 0
if havemf:
for c, m in sorted([(c, m) for m in mflinkrevs
for c in mflinkrevs[m]]):
count += 1
if m == nullid:
continue
ui.progress(_('crosschecking'), count, total=total)
err(c, _("changeset refers to unknown manifest %s") % short(m))
mflinkrevs = None # del is bad here due to scope issues
for f in sorted(filelinkrevs):
count += 1
ui.progress(_('crosschecking'), count, total=total)
if f not in filenodes:
lr = filelinkrevs[f][0]
err(lr, _("in changeset but not in manifest"), f)
if havecl:
for f in sorted(filenodes):
count += 1
ui.progress(_('crosschecking'), count, total=total)
if f not in filelinkrevs:
try:
fl = repo.file(f)
lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
except Exception:
lr = None
err(lr, _("in manifest but not in changeset"), f)
ui.progress(_('crosschecking'), None)
ui.status(_("checking files\n"))
storefiles = set()
for f, f2, size in repo.store.datafiles():
if not f:
err(None, _("cannot decode filename '%s'") % f2)
elif size > 0 or not revlogv1:
storefiles.add(_normpath(f))
files = sorted(set(filenodes) | set(filelinkrevs))
total = len(files)
for i, f in enumerate(files):
ui.progress(_('checking'), i, item=f, total=total)
try:
linkrevs = filelinkrevs[f]
except KeyError:
# in manifest but not in changelog
linkrevs = []
if linkrevs:
lr = linkrevs[0]
else:
lr = None
try:
fl = repo.file(f)
except error.RevlogError, e:
err(lr, _("broken revlog! (%s)") % e, f)
continue
for ff in fl.files():
try:
storefiles.remove(ff)
except KeyError:
err(lr, _("missing revlog!"), ff)
checklog(fl, f, lr)
seen = {}
rp = None
for i in fl:
revisions += 1
n = fl.node(i)
lr = checkentry(fl, i, n, seen, linkrevs, f)
if f in filenodes:
if havemf and n not in filenodes[f]:
err(lr, _("%s not in manifests") % (short(n)), f)
else:
del filenodes[f][n]
# verify contents
try:
l = len(fl.read(n))
rp = fl.renamed(n)
if l != fl.size(i):
if len(fl.revision(n)) != fl.size(i):
err(lr, _("unpacked size is %s, %s expected") %
(l, fl.size(i)), f)
except error.CensoredNodeError:
if ui.config("censor", "policy", "abort") == "abort":
err(lr, _("censored file data"), f)
except Exception, inst:
exc(lr, _("unpacking %s") % short(n), inst, f)
# check renames
try:
if rp:
if lr is not None and ui.verbose:
ctx = lrugetctx(lr)
found = False
for pctx in ctx.parents():
if rp[0] in pctx:
found = True
break
if not found:
warn(_("warning: copy source of '%s' not"
" in parents of %s") % (f, ctx))
fl2 = repo.file(rp[0])
if not len(fl2):
err(lr, _("empty or missing copy source revlog %s:%s")
% (rp[0], short(rp[1])), f)
elif rp[1] == nullid:
ui.note(_("warning: %s@%s: copy source"
" revision is nullid %s:%s\n")
% (f, lr, rp[0], short(rp[1])))
else:
fl2.rev(rp[1])
except Exception, inst:
exc(lr, _("checking rename of %s") % short(n), inst, f)
# cross-check
if f in filenodes:
fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
for lr, node in sorted(fns):
err(lr, _("%s in manifests not found") % short(node), f)
ui.progress(_('checking'), None)
for f in storefiles:
warn(_("warning: orphan revlog '%s'") % f)
ui.status(_("%d files, %d changesets, %d total revisions\n") %
(len(files), len(cl), revisions))
if warnings[0]:
ui.warn(_("%d warnings encountered!\n") % warnings[0])
if errors[0]:
ui.warn(_("%d integrity errors encountered!\n") % errors[0])
if badrevs:
ui.warn(_("(first damaged changeset appears to be %d)\n")
% min(badrevs))
return 1