##// END OF EJS Templates
compression: introduce an official `format.revlog-compression` option...
compression: introduce an official `format.revlog-compression` option This option supersedes the `experiment.format.compression` option. The value currently supported are zlib (default) and zstd (if Mercurial was compiled with zstd support). The option gained an explicit reference to `revlog` since this is the target usage here. Different storage methods might require different compression strategies. In our tests, using zstd give a significant CPU usage improvement (both compression and decompressing) while keeping similar repository size. Zstd as other interresting mode (dictionnary, pre-text, etc…) that are probably worth exploring. However, just plain switching from zlib to zstd provide a large benefit.

File last commit:

r42043:9c5a6af7 default
r42213:4ee906aa default
Show More
verify.py
535 lines | 20.5 KiB | text/x-python | PythonLexer
Matt Mackall
Move repo.verify
r2778 # verify.py - repository integrity checking for Mercurial
#
Thomas Arendsen Hein
Updated copyright notices and add "and others" to "hg version"
r4635 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
Matt Mackall
Move repo.verify
r2778 #
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Matt Mackall
Move repo.verify
r2778
Gregory Szorc
verify: use absolute_import
r25991 from __future__ import absolute_import
Bryan O'Sullivan
verify: fix all doubled-slash sites (issue3665)
r17860 import os
Gregory Szorc
verify: use absolute_import
r25991
from .i18n import _
from .node import (
nullid,
short,
)
from . import (
error,
Pulkit Goyal
py3: use pycompat.bytestr() instead of str()...
r35603 pycompat,
Gregory Szorc
verify: use absolute_import
r25991 revlog,
util,
)
Matt Mackall
Move repo.verify
r2778
def verify(repo):
Bryan O'Sullivan
with: use context manager in verify
r27849 with repo.lock():
Durham Goode
verify: move widely used variables into class members...
r27444 return verifier(repo).verify()
Matt Mackall
Use try/finally pattern to cleanup locks and transactions
r4915
Bryan O'Sullivan
verify: fix all doubled-slash sites (issue3665)
r17860 def _normpath(f):
# under hg < 2.4, convert didn't sanitize paths properly, so a
# converted repo may contain repeated slashes
while '//' in f:
f = f.replace('//', '/')
return f
Durham Goode
verify: move verify logic into a class...
r27443 class verifier(object):
Martin von Zweigbergk
narrow: move support for `hg verify` into core...
r39974 def __init__(self, repo):
Durham Goode
verify: move widely used variables into class members...
r27444 self.repo = repo.unfiltered()
self.ui = repo.ui
Martin von Zweigbergk
narrow: move support for `hg verify` into core...
r39974 self.match = repo.narrowmatch()
Durham Goode
verify: move widely used variables into class members...
r27444 self.badrevs = set()
Matt Mackall
verify: clean up weird error/warning lists...
r27453 self.errors = 0
self.warnings = 0
Durham Goode
verify: move widely used variables into class members...
r27444 self.havecl = len(repo.changelog) > 0
Gregory Szorc
manifest: add getstorage() to manifestlog and use it globally...
r39280 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
Durham Goode
verify: move widely used variables into class members...
r27444 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
Martin von Zweigbergk
verify: remove dependence on repo.changectx()...
r37318 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
Durham Goode
verify: move widely used variables into class members...
r27444 self.refersmf = False
Durham Goode
verify: move fncachewarned up to a class variable...
r27445 self.fncachewarned = False
Jun Wu
verify: add a config option to skip certain flag processors...
r32288 # developer config: verify.skipflags
self.skipflags = repo.ui.configint('verify', 'skipflags')
Gregory Szorc
verify: allow suppressing warnings about extra files...
r37435 self.warnorphanstorefiles = True
Durham Goode
verify: move widely used variables into class members...
r27444
verify: make the `warn` method private...
r42028 def _warn(self, msg):
verify: document the `warn` method...
r42027 """record a "warning" level issue"""
Durham Goode
verify: move warn() to a class level function...
r27446 self.ui.warn(msg + "\n")
Matt Mackall
verify: clean up weird error/warning lists...
r27453 self.warnings += 1
Durham Goode
verify: move warn() to a class level function...
r27446
verify: make `err` a private method...
r42030 def _err(self, linkrev, msg, filename=None):
verify: document the `err` method...
r42029 """record a "error" level issue"""
Durham Goode
verify: move err() to be a class function...
r27447 if linkrev is not None:
self.badrevs.add(linkrev)
Pulkit Goyal
py3: use "%d" to convert integers to bytes...
r36203 linkrev = "%d" % linkrev
Durham Goode
verify: move err() to be a class function...
r27447 else:
linkrev = '?'
msg = "%s: %s" % (linkrev, msg)
if filename:
msg = "%s@%s" % (filename, msg)
self.ui.warn(" " + msg + "\n")
Matt Mackall
verify: clean up weird error/warning lists...
r27453 self.errors += 1
Durham Goode
verify: move err() to be a class function...
r27447
verify: make the `exc` method private...
r42032 def _exc(self, linkrev, msg, inst, filename=None):
verify: document the `exc` method...
r42031 """record exception raised during the verify process"""
Augie Fackler
verify: fix exception formatting bug in Python 3...
r36595 fmsg = pycompat.bytestr(inst)
if not fmsg:
fmsg = pycompat.byterepr(inst)
verify: make `err` a private method...
r42030 self._err(linkrev, "%s: %s" % (msg, fmsg), filename)
Durham Goode
verify: move exc() function onto class...
r27448
verify: rename the `checklog` to `_checkrevlog`...
r42040 def _checkrevlog(self, obj, name, linkrev):
verify: document the `checklog` method...
r42039 """verify high level property of a revlog
- revlog is present,
- revlog is non-empty,
- sizes (index and data) are correct,
- revlog's format version is correct.
"""
Durham Goode
verify: move checklog() onto class...
r27642 if not len(obj) and (self.havecl or self.havemf):
verify: make `err` a private method...
r42030 self._err(linkrev, _("empty or missing %s") % name)
Durham Goode
verify: move checklog() onto class...
r27642 return
d = obj.checksize()
if d[0]:
self.err(None, _("data length off by %d bytes") % d[0], name)
if d[1]:
self.err(None, _("index contains %d extra bytes") % d[1], name)
if obj.version != revlog.REVLOGV0:
if not self.revlogv1:
verify: make the `warn` method private...
r42028 self._warn(_("warning: `%s' uses revlog format 1") % name)
Durham Goode
verify: move checklog() onto class...
r27642 elif self.revlogv1:
verify: make the `warn` method private...
r42028 self._warn(_("warning: `%s' uses revlog format 0") % name)
Durham Goode
verify: move checklog() onto class...
r27642
verify: make `checkentry` a private method...
r42037 def _checkentry(self, obj, i, node, seen, linkrevs, f):
verify: document the `checkentry` method...
r42036 """verify a single revlog entry
arguments are:
- obj: the source revlog
- i: the revision number
- node: the revision node id
- seen: nodes previously seen for this revlog
- linkrevs: [changelog-revisions] introducing "node"
- f: string label ("changelog", "manifest", or filename)
Performs the following checks:
- linkrev points to an existing changelog revision,
- linkrev points to a changelog revision that introduces this revision,
- linkrev points to the lowest of these changesets,
- both parents exist in the revlog,
- the revision is not duplicated.
Return the linkrev of the revision (or None for changelog's revisions).
"""
Durham Goode
verify: move checkentry() to be a class function...
r27643 lr = obj.linkrev(obj.rev(node))
if lr < 0 or (self.havecl and lr not in linkrevs):
if lr < 0 or lr >= len(self.repo.changelog):
msg = _("rev %d points to nonexistent changeset %d")
else:
msg = _("rev %d points to unexpected changeset %d")
verify: make `err` a private method...
r42030 self._err(None, msg % (i, lr), f)
Durham Goode
verify: move checkentry() to be a class function...
r27643 if linkrevs:
if f and len(linkrevs) > 1:
try:
# attempt to filter down to real linkrevs
linkrevs = [l for l in linkrevs
if self.lrugetctx(l)[f].filenode() == node]
except Exception:
pass
verify: make the `warn` method private...
r42028 self._warn(_(" (expected %s)") % " ".join
(map(pycompat.bytestr, linkrevs)))
Durham Goode
verify: move checkentry() to be a class function...
r27643 lr = None # can't be trusted
try:
p1, p2 = obj.parents(node)
if p1 not in seen and p1 != nullid:
verify: make `err` a private method...
r42030 self._err(lr, _("unknown parent 1 %s of %s") %
Durham Goode
verify: move checkentry() to be a class function...
r27643 (short(p1), short(node)), f)
if p2 not in seen and p2 != nullid:
verify: make `err` a private method...
r42030 self._err(lr, _("unknown parent 2 %s of %s") %
Durham Goode
verify: move checkentry() to be a class function...
r27643 (short(p2), short(node)), f)
except Exception as inst:
verify: make the `exc` method private...
r42032 self._exc(lr, _("checking parents of %s") % short(node), inst, f)
Durham Goode
verify: move checkentry() to be a class function...
r27643
if node in seen:
verify: make `err` a private method...
r42030 self._err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
Durham Goode
verify: move checkentry() to be a class function...
r27643 seen[node] = i
return lr
Durham Goode
verify: move widely used variables into class members...
r27444 def verify(self):
verify: minimal documentation for `verifier.verify`...
r42033 """verify the content of the Mercurial repository
This method run all verifications, displaying issues as they are found.
verify: explicitly return 0 if no error are encountered...
r42034 return 1 if any error have been encountered, 0 otherwise."""
verify: add some inline documentation to the top level `verify` method...
r42035 # initial validation and generic report
Durham Goode
verify: move widely used variables into class members...
r27444 repo = self.repo
Durham Goode
verify: move verify logic into a class...
r27443 ui = repo.ui
if not repo.url().startswith('file:'):
raise error.Abort(_("cannot verify bundle or remote repos"))
Matt Mackall
verify: lots of refactoring...
r6752
Durham Goode
verify: move verify logic into a class...
r27443 if os.path.exists(repo.sjoin("journal")):
ui.warn(_("abandoned transaction found - run hg recover\n"))
Durham Goode
verify: get rid of some unnecessary local variables...
r27648 if ui.verbose or not self.revlogv1:
Durham Goode
verify: move verify logic into a class...
r27443 ui.status(_("repository uses revlog format %d\n") %
Durham Goode
verify: get rid of some unnecessary local variables...
r27648 (self.revlogv1 and 1 or 0))
Durham Goode
verify: move verify logic into a class...
r27443
verify: add some inline documentation to the top level `verify` method...
r42035 # data verification
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 mflinkrevs, filelinkrevs = self._verifychangelog()
filenodes = self._verifymanifest(mflinkrevs)
Martin von Zweigbergk
verify: move cross-checking of changeset/manifest out of _crosscheckfiles()...
r28111 del mflinkrevs
self._crosscheckfiles(filelinkrevs, filenodes)
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
verify: add some inline documentation to the top level `verify` method...
r42035 # final report
Meirambek Omyrzak
verify: make output less confusing (issue5924)...
r39525 ui.status(_("checked %d changesets with %d changes to %d files\n") %
(len(repo.changelog), filerevisions, totalfiles))
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 if self.warnings:
ui.warn(_("%d warnings encountered!\n") % self.warnings)
if self.fncachewarned:
ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
'corrupt fncache\n'))
if self.errors:
ui.warn(_("%d integrity errors encountered!\n") % self.errors)
Durham Goode
verify: get rid of some unnecessary local variables...
r27648 if self.badrevs:
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 ui.warn(_("(first damaged changeset appears to be %d)\n")
Durham Goode
verify: get rid of some unnecessary local variables...
r27648 % min(self.badrevs))
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 return 1
verify: explicitly return 0 if no error are encountered...
r42034 return 0
Durham Goode
verify: move changelog verificaiton to its own function...
r27647
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 def _verifychangelog(self):
verify: document `_verifychangelog`...
r42041 """verify the changelog of a repository
The following checks are performed:
- all of `_checkrevlog` checks,
- all of `_checkentry` checks (for each revisions),
- each revision can be read.
The function returns some of the data observed in the changesets as a
(mflinkrevs, filelinkrevs) tuples:
- mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
- filelinkrevs: is a { file-path -> [changelog-rev] } mapping
If a matcher was specified, filelinkrevs will only contains matched
files.
"""
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 ui = self.ui
repo = self.repo
Martin von Zweigbergk
verify: replace _validpath() by matcher...
r30866 match = self.match
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 cl = repo.changelog
Durham Goode
verify: move verify logic into a class...
r27443 ui.status(_("checking changesets\n"))
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 mflinkrevs = {}
filelinkrevs = {}
Durham Goode
verify: move verify logic into a class...
r27443 seen = {}
verify: rename the `checklog` to `_checkrevlog`...
r42040 self._checkrevlog(cl, "changelog", 0)
Martin von Zweigbergk
verify: use progress helper...
r38416 progress = ui.makeprogress(_('checking'), unit=_('changesets'),
total=len(repo))
Durham Goode
verify: move verify logic into a class...
r27443 for i in repo:
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.update(i)
Durham Goode
verify: move verify logic into a class...
r27443 n = cl.node(i)
verify: make `checkentry` a private method...
r42037 self._checkentry(cl, i, n, seen, [i], "changelog")
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 try:
changes = cl.read(n)
if changes[0] != nullid:
mflinkrevs.setdefault(changes[0], []).append(i)
Durham Goode
verify: move widely used variables into class members...
r27444 self.refersmf = True
Durham Goode
verify: move verify logic into a class...
r27443 for f in changes[3]:
Martin von Zweigbergk
verify: replace _validpath() by matcher...
r30866 if match(f):
Durham Goode
verify: move verify logic into a class...
r27443 filelinkrevs.setdefault(_normpath(f), []).append(i)
except Exception as inst:
Durham Goode
verify: move widely used variables into class members...
r27444 self.refersmf = True
verify: make the `exc` method private...
r42032 self._exc(i, _("unpacking changeset %s") % short(n), inst)
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.complete()
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 return mflinkrevs, filelinkrevs
Matt Mackall
Move repo.verify
r2778
Martin von Zweigbergk
verify: show progress while verifying dirlogs...
r28205 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
Martin von Zweigbergk
verify: use progress helper for subdirectory progress...
r38415 subdirprogress=None):
verify: document the `_verifymanifest` method
r42042 """verify the manifestlog content
Inputs:
- mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
- dir: a subdirectory to check (for tree manifest repo)
- storefiles: set of currently "orphan" files.
- subdirprogress: a progress object
This function checks:
* all of `_checkrevlog` checks (for all manifest related revlogs)
* all of `_checkentry` checks (for all manifest related revisions)
* nodes for subdirectory exists in the sub-directory manifest
* each manifest entries have a file path
* each manifest node refered in mflinkrevs exist in the manifest log
If tree manifest is in use and a matchers is specified, only the
sub-directories matching it will be verified.
return a two level mapping:
{"path" -> { filenode -> changelog-revision}}
This mapping primarily contains entries for every files in the
repository. In addition, when tree-manifest is used, it also contains
sub-directory entries.
If a matcher is provided, only matching paths will be included.
"""
Durham Goode
verify: move manifest verification to its own function...
r27646 repo = self.repo
ui = self.ui
Martin von Zweigbergk
verify: replace _validpath() by matcher...
r30866 match = self.match
Durham Goode
manifest: remove manifest.readshallowdelta...
r30295 mfl = self.repo.manifestlog
Gregory Szorc
manifest: add getstorage() to manifestlog and use it globally...
r39280 mf = mfl.getstorage(dir)
Durham Goode
verify: move manifest verification to its own function...
r27646
Martin von Zweigbergk
verify: check directory manifests...
r28203 if not dir:
self.ui.status(_("checking manifests\n"))
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 filenodes = {}
Martin von Zweigbergk
verify: check directory manifests...
r28203 subdirnodes = {}
Durham Goode
verify: move verify logic into a class...
r27443 seen = {}
Martin von Zweigbergk
verify: extract "manifest" constant into variable...
r28115 label = "manifest"
Martin von Zweigbergk
verify: check directory manifests...
r28203 if dir:
label = dir
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204 revlogfiles = mf.files()
storefiles.difference_update(revlogfiles)
Martin von Zweigbergk
verify: use progress helper for subdirectory progress...
r38415 if subdirprogress: # should be true since we're in a subdirectory
subdirprogress.increment()
Durham Goode
verify: move widely used variables into class members...
r27444 if self.refersmf:
Durham Goode
verify: move verify logic into a class...
r27443 # Do not check manifest if there are only changelog entries with
# null manifests.
verify: rename the `checklog` to `_checkrevlog`...
r42040 self._checkrevlog(mf, label, 0)
Martin von Zweigbergk
verify: use progress helper...
r38416 progress = ui.makeprogress(_('checking'), unit=_('manifests'),
total=len(mf))
Durham Goode
verify: move verify logic into a class...
r27443 for i in mf:
Martin von Zweigbergk
verify: check directory manifests...
r28203 if not dir:
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.update(i)
Durham Goode
verify: move verify logic into a class...
r27443 n = mf.node(i)
verify: make `checkentry` a private method...
r42037 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
Durham Goode
verify: move verify logic into a class...
r27443 if n in mflinkrevs:
del mflinkrevs[n]
Martin von Zweigbergk
verify: check directory manifests...
r28203 elif dir:
verify: make `err` a private method...
r42030 self._err(lr, _("%s not in parent-directory manifest") %
Martin von Zweigbergk
verify: check directory manifests...
r28203 short(n), label)
Durham Goode
verify: move verify logic into a class...
r27443 else:
verify: make `err` a private method...
r42030 self._err(lr, _("%s not in changesets") % short(n), label)
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 try:
Durham Goode
manifest: remove manifest.readshallowdelta...
r30295 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
for f, fn, fl in mfdelta.iterentries():
Durham Goode
verify: move verify logic into a class...
r27443 if not f:
verify: make `err` a private method...
r42030 self._err(lr, _("entry without name in manifest"))
Martin von Zweigbergk
verify: check directory manifests...
r28203 elif f == "/dev/null": # ignore this in very old repos
continue
fullpath = dir + _normpath(f)
if fl == 't':
Martin von Zweigbergk
verify: replace _validpath() by matcher...
r30866 if not match.visitdir(fullpath):
continue
Martin von Zweigbergk
verify: check directory manifests...
r28203 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
fn, []).append(lr)
else:
Martin von Zweigbergk
verify: replace _validpath() by matcher...
r30866 if not match(fullpath):
continue
Martin von Zweigbergk
verify: check directory manifests...
r28203 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
Durham Goode
verify: move verify logic into a class...
r27443 except Exception as inst:
verify: make the `exc` method private...
r42032 self._exc(lr, _("reading delta %s") % short(n), inst, label)
Martin von Zweigbergk
verify: check directory manifests...
r28203 if not dir:
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.complete()
Durham Goode
verify: move verify logic into a class...
r27443
Martin von Zweigbergk
verify: move cross-checking of changeset/manifest out of _crosscheckfiles()...
r28111 if self.havemf:
verify: small refactoring and documentation in `_verifymanifest`...
r42043 # since we delete entry in `mflinkrevs` during iteration, any
# remaining entries are "missing". We need to issue errors for them.
changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
for c, m in sorted(changesetpairs):
Martin von Zweigbergk
verify: check directory manifests...
r28203 if dir:
verify: make `err` a private method...
r42030 self._err(c, _("parent-directory manifest refers to unknown"
" revision %s") % short(m), label)
Martin von Zweigbergk
verify: check directory manifests...
r28203 else:
verify: make `err` a private method...
r42030 self._err(c, _("changeset refers to unknown revision %s") %
short(m), label)
Martin von Zweigbergk
verify: check directory manifests...
r28203
if not dir and subdirnodes:
self.ui.status(_("checking directory manifests\n"))
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204 storefiles = set()
Martin von Zweigbergk
verify: show progress while verifying dirlogs...
r28205 subdirs = set()
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204 revlogv1 = self.revlogv1
for f, f2, size in repo.store.datafiles():
if not f:
verify: make `err` a private method...
r42030 self._err(None, _("cannot decode filename '%s'") % f2)
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
storefiles.add(_normpath(f))
Martin von Zweigbergk
verify: show progress while verifying dirlogs...
r28205 subdirs.add(os.path.dirname(f))
Martin von Zweigbergk
verify: use progress helper for subdirectory progress...
r38415 subdirprogress = ui.makeprogress(_('checking'), unit=_('manifests'),
total=len(subdirs))
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204
Martin von Zweigbergk
verify: check directory manifests...
r28203 for subdir, linkrevs in subdirnodes.iteritems():
Martin von Zweigbergk
verify: show progress while verifying dirlogs...
r28205 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
Martin von Zweigbergk
verify: use progress helper for subdirectory progress...
r38415 subdirprogress)
Martin von Zweigbergk
verify: check directory manifests...
r28203 for f, onefilenodes in subdirfilenodes.iteritems():
filenodes.setdefault(f, {}).update(onefilenodes)
Martin von Zweigbergk
verify: move cross-checking of changeset/manifest out of _crosscheckfiles()...
r28111
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204 if not dir and subdirnodes:
Martin von Zweigbergk
verify: use progress helper for subdirectory progress...
r38415 subdirprogress.complete()
Gregory Szorc
verify: allow suppressing warnings about extra files...
r37435 if self.warnorphanstorefiles:
for f in sorted(storefiles):
verify: make the `warn` method private...
r42028 self._warn(_("warning: orphan data file '%s'") % f)
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 return filenodes
Durham Goode
verify: move file cross checking to its own function...
r27645
Martin von Zweigbergk
verify: move cross-checking of changeset/manifest out of _crosscheckfiles()...
r28111 def _crosscheckfiles(self, filelinkrevs, filenodes):
Durham Goode
verify: move file cross checking to its own function...
r27645 repo = self.repo
ui = self.ui
Durham Goode
verify: move verify logic into a class...
r27443 ui.status(_("crosschecking files in changesets and manifests\n"))
Matt Mackall
Move repo.verify
r2778
Martin von Zweigbergk
verify: move cross-checking of changeset/manifest out of _crosscheckfiles()...
r28111 total = len(filelinkrevs) + len(filenodes)
av6
verify: provide unit to ui.makeprogress()
r40670 progress = ui.makeprogress(_('crosschecking'), unit=_('files'),
total=total)
Durham Goode
verify: move file cross checking to its own function...
r27645 if self.havemf:
Durham Goode
verify: move verify logic into a class...
r27443 for f in sorted(filelinkrevs):
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.increment()
Durham Goode
verify: move verify logic into a class...
r27443 if f not in filenodes:
lr = filelinkrevs[f][0]
verify: make `err` a private method...
r42030 self._err(lr, _("in changeset but not in manifest"), f)
Adrian Buehlmann
verify: check repo.store
r6892
Durham Goode
verify: move file cross checking to its own function...
r27645 if self.havecl:
Durham Goode
verify: move verify logic into a class...
r27443 for f in sorted(filenodes):
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.increment()
Durham Goode
verify: move verify logic into a class...
r27443 if f not in filelinkrevs:
try:
fl = repo.file(f)
lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
except Exception:
lr = None
verify: make `err` a private method...
r42030 self._err(lr, _("in manifest but not in changeset"), f)
Durham Goode
verify: move verify logic into a class...
r27443
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.complete()
Henrik Stuart
verify: avoid exception on missing file revlog...
r8291
Durham Goode
verify: move filelog verification to its own function...
r27644 def _verifyfiles(self, filenodes, filelinkrevs):
repo = self.repo
ui = self.ui
lrugetctx = self.lrugetctx
revlogv1 = self.revlogv1
havemf = self.havemf
Durham Goode
verify: move verify logic into a class...
r27443 ui.status(_("checking files\n"))
Henrik Stuart
verify: avoid exception on missing file revlog...
r8291
Durham Goode
verify: move verify logic into a class...
r27443 storefiles = set()
for f, f2, size in repo.store.datafiles():
if not f:
verify: make `err` a private method...
r42030 self._err(None, _("cannot decode filename '%s'") % f2)
Martin von Zweigbergk
treemanifests: fix streaming clone...
r28007 elif (size > 0 or not revlogv1) and f.startswith('data/'):
Durham Goode
verify: move verify logic into a class...
r27443 storefiles.add(_normpath(f))
Adrian Buehlmann
verify: check repo.store
r6892
Gregory Szorc
verify: start to abstract file verification...
r39878 state = {
Gregory Szorc
revlog: use proper version comparison during verify...
r39881 # TODO this assumes revlog storage for changelog.
Gregory Szorc
revlog: move revision verification out of verify...
r39908 'expectedversion': self.repo.changelog.version & 0xFFFF,
'skipflags': self.skipflags,
# experimental config: censor.policy
'erroroncensored': ui.config('censor', 'policy') == 'abort',
Gregory Szorc
verify: start to abstract file verification...
r39878 }
Durham Goode
verify: move verify logic into a class...
r27443 files = sorted(set(filenodes) | set(filelinkrevs))
Durham Goode
verify: move filelog verification to its own function...
r27644 revisions = 0
Martin von Zweigbergk
verify: use progress helper...
r38416 progress = ui.makeprogress(_('checking'), unit=_('files'),
total=len(files))
Durham Goode
verify: move verify logic into a class...
r27443 for i, f in enumerate(files):
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.update(i, item=f)
Adrian Buehlmann
verify: check repo.store
r6892 try:
Durham Goode
verify: move verify logic into a class...
r27443 linkrevs = filelinkrevs[f]
Adrian Buehlmann
verify: check repo.store
r6892 except KeyError:
Durham Goode
verify: move verify logic into a class...
r27443 # in manifest but not in changelog
linkrevs = []
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 if linkrevs:
lr = linkrevs[0]
else:
lr = None
Matt Mackall
Move repo.verify
r2778
Matt Mackall
verify: add rename link checking
r3744 try:
Durham Goode
verify: move verify logic into a class...
r27443 fl = repo.file(f)
Gregory Szorc
global: replace most uses of RevlogError with StorageError (API)...
r39813 except error.StorageError as e:
verify: make `err` a private method...
r42030 self._err(lr, _("broken revlog! (%s)") % e, f)
Durham Goode
verify: move verify logic into a class...
r27443 continue
for ff in fl.files():
try:
storefiles.remove(ff)
except KeyError:
Gregory Szorc
verify: allow suppressing warnings about extra files...
r37435 if self.warnorphanstorefiles:
verify: make the `warn` method private...
r42028 self._warn(_(" warning: revlog '%s' not in fncache!") %
Gregory Szorc
verify: allow suppressing warnings about extra files...
r37435 ff)
self.fncachewarned = True
Durham Goode
verify: move verify logic into a class...
r27443
Gregory Szorc
verify: start to abstract file verification...
r39878 if not len(fl) and (self.havecl or self.havemf):
verify: make `err` a private method...
r42030 self._err(lr, _("empty or missing %s") % f)
Gregory Szorc
verify: start to abstract file verification...
r39878 else:
Gregory Szorc
revlog: move revision verification out of verify...
r39908 # Guard against implementations not setting this.
state['skipread'] = set()
Gregory Szorc
verify: start to abstract file verification...
r39878 for problem in fl.verifyintegrity(state):
Gregory Szorc
revlog: move revision verification out of verify...
r39908 if problem.node is not None:
linkrev = fl.linkrev(fl.rev(problem.node))
else:
linkrev = None
Gregory Szorc
verify: start to abstract file verification...
r39878 if problem.warning:
verify: make the `warn` method private...
r42028 self._warn(problem.warning)
Gregory Szorc
verify: start to abstract file verification...
r39878 elif problem.error:
verify: make `err` a private method...
r42030 self._err(linkrev if linkrev is not None else lr,
problem.error, f)
Gregory Szorc
verify: start to abstract file verification...
r39878 else:
raise error.ProgrammingError(
'problem instance does not set warning or error '
'attribute: %s' % problem.msg)
Durham Goode
verify: move verify logic into a class...
r27443 seen = {}
for i in fl:
revisions += 1
n = fl.node(i)
verify: make `checkentry` a private method...
r42037 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
Durham Goode
verify: move verify logic into a class...
r27443 if f in filenodes:
if havemf and n not in filenodes[f]:
verify: make `err` a private method...
r42030 self._err(lr, _("%s not in manifests") % (short(n)), f)
Patrick Mezard
verify: check copy source revlog and nodeid
r6534 else:
Durham Goode
verify: move verify logic into a class...
r27443 del filenodes[f][n]
Gregory Szorc
revlog: move revision verification out of verify...
r39908 if n in state['skipread']:
continue
Matt Mackall
verify: add rename link checking
r3744
Durham Goode
verify: move verify logic into a class...
r27443 # check renames
try:
Gregory Szorc
revlog: move revision verification out of verify...
r39908 # This requires resolving fulltext (at least on revlogs). We
# may want ``verifyintegrity()`` to pass a set of nodes with
# rename metadata as an optimization.
rp = fl.renamed(n)
Durham Goode
verify: move verify logic into a class...
r27443 if rp:
if lr is not None and ui.verbose:
ctx = lrugetctx(lr)
Martin von Zweigbergk
verify: don't reimplement any()...
r36357 if not any(rp[0] in pctx for pctx in ctx.parents()):
verify: make the `warn` method private...
r42028 self._warn(_("warning: copy source of '%s' not"
Durham Goode
verify: move warn() to a class level function...
r27446 " in parents of %s") % (f, ctx))
Durham Goode
verify: move verify logic into a class...
r27443 fl2 = repo.file(rp[0])
if not len(fl2):
verify: make `err` a private method...
r42030 self._err(lr,
_("empty or missing copy source revlog "
"%s:%s") % (rp[0],
short(rp[1])),
f)
Durham Goode
verify: move verify logic into a class...
r27443 elif rp[1] == nullid:
ui.note(_("warning: %s@%s: copy source"
" revision is nullid %s:%s\n")
% (f, lr, rp[0], short(rp[1])))
else:
fl2.rev(rp[1])
except Exception as inst:
verify: make the `exc` method private...
r42032 self._exc(lr, _("checking rename of %s") % short(n),
inst, f)
Adrian Buehlmann
verify: check repo.store
r6892
Durham Goode
verify: move verify logic into a class...
r27443 # cross-check
if f in filenodes:
Augie Fackler
verify: avoid shadowing two variables with a list comprehension...
r30393 fns = [(v, k) for k, v in filenodes[f].iteritems()]
Durham Goode
verify: move verify logic into a class...
r27443 for lr, node in sorted(fns):
verify: make `err` a private method...
r42030 self._err(lr, _("manifest refers to unknown revision %s") %
short(node), f)
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.complete()
Durham Goode
verify: move verify logic into a class...
r27443
Gregory Szorc
verify: allow suppressing warnings about extra files...
r37435 if self.warnorphanstorefiles:
for f in sorted(storefiles):
verify: make the `warn` method private...
r42028 self._warn(_("warning: orphan data file '%s'") % f)
Durham Goode
verify: move verify logic into a class...
r27443
Durham Goode
verify: move filelog verification to its own function...
r27644 return len(files), revisions