##// END OF EJS Templates
copies: split the combination of the copies mapping in its own function...
copies: split the combination of the copies mapping in its own function In some case, this part take up to 95% of the copy tracing that take about a hundred second. This poor performance comes from the fact we keep duplciating and merging dictionary that are mostly similar. I want to experiment with smarter native code to do this, so I need to isolate the function first.

File last commit:

r43376:d783f945 default
r44178:0cc91600 default
Show More
verify.py
625 lines | 22.6 KiB | text/x-python | PythonLexer
Matt Mackall
Move repo.verify
r2778 # verify.py - repository integrity checking for Mercurial
#
Thomas Arendsen Hein
Updated copyright notices and add "and others" to "hg version"
r4635 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
Matt Mackall
Move repo.verify
r2778 #
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Matt Mackall
Move repo.verify
r2778
Gregory Szorc
verify: use absolute_import
r25991 from __future__ import absolute_import
Bryan O'Sullivan
verify: fix all doubled-slash sites (issue3665)
r17860 import os
Gregory Szorc
verify: use absolute_import
r25991
from .i18n import _
from .node import (
nullid,
short,
)
from . import (
error,
Pulkit Goyal
py3: use pycompat.bytestr() instead of str()...
r35603 pycompat,
Gregory Szorc
verify: use absolute_import
r25991 revlog,
util,
)
Matt Mackall
Move repo.verify
r2778
verify: introduce a notion of "level"...
r42331 VERIFY_DEFAULT = 0
verify: introduce an experimental --full flag...
r42332 VERIFY_FULL = 1
verify: introduce a notion of "level"...
r42331
Augie Fackler
formatting: blacken the codebase...
r43346
verify: introduce a notion of "level"...
r42331 def verify(repo, level=None):
Bryan O'Sullivan
with: use context manager in verify
r27849 with repo.lock():
verify: introduce a notion of "level"...
r42331 v = verifier(repo, level)
return v.verify()
Matt Mackall
Use try/finally pattern to cleanup locks and transactions
r4915
Augie Fackler
formatting: blacken the codebase...
r43346
Bryan O'Sullivan
verify: fix all doubled-slash sites (issue3665)
r17860 def _normpath(f):
# under hg < 2.4, convert didn't sanitize paths properly, so a
# converted repo may contain repeated slashes
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 while b'//' in f:
f = f.replace(b'//', b'/')
Bryan O'Sullivan
verify: fix all doubled-slash sites (issue3665)
r17860 return f
Augie Fackler
formatting: blacken the codebase...
r43346
Durham Goode
verify: move verify logic into a class...
r27443 class verifier(object):
verify: introduce a notion of "level"...
r42331 def __init__(self, repo, level=None):
Durham Goode
verify: move widely used variables into class members...
r27444 self.repo = repo.unfiltered()
self.ui = repo.ui
Martin von Zweigbergk
narrow: move support for `hg verify` into core...
r39974 self.match = repo.narrowmatch()
verify: introduce a notion of "level"...
r42331 if level is None:
level = VERIFY_DEFAULT
self._level = level
Durham Goode
verify: move widely used variables into class members...
r27444 self.badrevs = set()
Matt Mackall
verify: clean up weird error/warning lists...
r27453 self.errors = 0
self.warnings = 0
Durham Goode
verify: move widely used variables into class members...
r27444 self.havecl = len(repo.changelog) > 0
Gregory Szorc
manifest: add getstorage() to manifestlog and use it globally...
r39280 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
Durham Goode
verify: move widely used variables into class members...
r27444 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
Martin von Zweigbergk
verify: remove dependence on repo.changectx()...
r37318 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
Durham Goode
verify: move widely used variables into class members...
r27444 self.refersmf = False
Durham Goode
verify: move fncachewarned up to a class variable...
r27445 self.fncachewarned = False
Jun Wu
verify: add a config option to skip certain flag processors...
r32288 # developer config: verify.skipflags
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
Gregory Szorc
verify: allow suppressing warnings about extra files...
r37435 self.warnorphanstorefiles = True
Durham Goode
verify: move widely used variables into class members...
r27444
verify: make the `warn` method private...
r42028 def _warn(self, msg):
verify: document the `warn` method...
r42027 """record a "warning" level issue"""
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self.ui.warn(msg + b"\n")
Matt Mackall
verify: clean up weird error/warning lists...
r27453 self.warnings += 1
Durham Goode
verify: move warn() to a class level function...
r27446
verify: make `err` a private method...
r42030 def _err(self, linkrev, msg, filename=None):
verify: document the `err` method...
r42029 """record a "error" level issue"""
Durham Goode
verify: move err() to be a class function...
r27447 if linkrev is not None:
self.badrevs.add(linkrev)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 linkrev = b"%d" % linkrev
Durham Goode
verify: move err() to be a class function...
r27447 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 linkrev = b'?'
msg = b"%s: %s" % (linkrev, msg)
Durham Goode
verify: move err() to be a class function...
r27447 if filename:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 msg = b"%s@%s" % (filename, msg)
self.ui.warn(b" " + msg + b"\n")
Matt Mackall
verify: clean up weird error/warning lists...
r27453 self.errors += 1
Durham Goode
verify: move err() to be a class function...
r27447
verify: make the `exc` method private...
r42032 def _exc(self, linkrev, msg, inst, filename=None):
verify: document the `exc` method...
r42031 """record exception raised during the verify process"""
Augie Fackler
verify: fix exception formatting bug in Python 3...
r36595 fmsg = pycompat.bytestr(inst)
if not fmsg:
fmsg = pycompat.byterepr(inst)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
Durham Goode
verify: move exc() function onto class...
r27448
verify: rename the `checklog` to `_checkrevlog`...
r42040 def _checkrevlog(self, obj, name, linkrev):
verify: document the `checklog` method...
r42039 """verify high level property of a revlog
- revlog is present,
- revlog is non-empty,
- sizes (index and data) are correct,
- revlog's format version is correct.
"""
Durham Goode
verify: move checklog() onto class...
r27642 if not len(obj) and (self.havecl or self.havemf):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(linkrev, _(b"empty or missing %s") % name)
Durham Goode
verify: move checklog() onto class...
r27642 return
d = obj.checksize()
if d[0]:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(None, _(b"data length off by %d bytes") % d[0], name)
Durham Goode
verify: move checklog() onto class...
r27642 if d[1]:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
Durham Goode
verify: move checklog() onto class...
r27642
if obj.version != revlog.REVLOGV0:
if not self.revlogv1:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
Durham Goode
verify: move checklog() onto class...
r27642 elif self.revlogv1:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
Durham Goode
verify: move checklog() onto class...
r27642
verify: make `checkentry` a private method...
r42037 def _checkentry(self, obj, i, node, seen, linkrevs, f):
verify: document the `checkentry` method...
r42036 """verify a single revlog entry
arguments are:
- obj: the source revlog
- i: the revision number
- node: the revision node id
- seen: nodes previously seen for this revlog
- linkrevs: [changelog-revisions] introducing "node"
- f: string label ("changelog", "manifest", or filename)
Performs the following checks:
- linkrev points to an existing changelog revision,
- linkrev points to a changelog revision that introduces this revision,
- linkrev points to the lowest of these changesets,
- both parents exist in the revlog,
- the revision is not duplicated.
Return the linkrev of the revision (or None for changelog's revisions).
"""
Durham Goode
verify: move checkentry() to be a class function...
r27643 lr = obj.linkrev(obj.rev(node))
if lr < 0 or (self.havecl and lr not in linkrevs):
if lr < 0 or lr >= len(self.repo.changelog):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 msg = _(b"rev %d points to nonexistent changeset %d")
Durham Goode
verify: move checkentry() to be a class function...
r27643 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 msg = _(b"rev %d points to unexpected changeset %d")
verify: make `err` a private method...
r42030 self._err(None, msg % (i, lr), f)
Durham Goode
verify: move checkentry() to be a class function...
r27643 if linkrevs:
if f and len(linkrevs) > 1:
try:
# attempt to filter down to real linkrevs
Augie Fackler
formatting: blacken the codebase...
r43346 linkrevs = [
l
for l in linkrevs
if self.lrugetctx(l)[f].filenode() == node
]
Durham Goode
verify: move checkentry() to be a class function...
r27643 except Exception:
pass
Augie Fackler
formatting: blacken the codebase...
r43346 self._warn(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b" (expected %s)")
% b" ".join(map(pycompat.bytestr, linkrevs))
Augie Fackler
formatting: blacken the codebase...
r43346 )
lr = None # can't be trusted
Durham Goode
verify: move checkentry() to be a class function...
r27643
try:
p1, p2 = obj.parents(node)
if p1 not in seen and p1 != nullid:
Augie Fackler
formatting: blacken the codebase...
r43346 self._err(
lr,
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"unknown parent 1 %s of %s") % (short(p1), short(node)),
Augie Fackler
formatting: blacken the codebase...
r43346 f,
)
Durham Goode
verify: move checkentry() to be a class function...
r27643 if p2 not in seen and p2 != nullid:
Augie Fackler
formatting: blacken the codebase...
r43346 self._err(
lr,
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
Augie Fackler
formatting: blacken the codebase...
r43346 f,
)
Durham Goode
verify: move checkentry() to be a class function...
r27643 except Exception as inst:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
Durham Goode
verify: move checkentry() to be a class function...
r27643
if node in seen:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
Durham Goode
verify: move checkentry() to be a class function...
r27643 seen[node] = i
return lr
Durham Goode
verify: move widely used variables into class members...
r27444 def verify(self):
verify: minimal documentation for `verifier.verify`...
r42033 """verify the content of the Mercurial repository
This method run all verifications, displaying issues as they are found.
verify: explicitly return 0 if no error are encountered...
r42034 return 1 if any error have been encountered, 0 otherwise."""
verify: add some inline documentation to the top level `verify` method...
r42035 # initial validation and generic report
Durham Goode
verify: move widely used variables into class members...
r27444 repo = self.repo
Durham Goode
verify: move verify logic into a class...
r27443 ui = repo.ui
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if not repo.url().startswith(b'file:'):
raise error.Abort(_(b"cannot verify bundle or remote repos"))
Matt Mackall
verify: lots of refactoring...
r6752
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if os.path.exists(repo.sjoin(b"journal")):
ui.warn(_(b"abandoned transaction found - run hg recover\n"))
Durham Goode
verify: move verify logic into a class...
r27443
Durham Goode
verify: get rid of some unnecessary local variables...
r27648 if ui.verbose or not self.revlogv1:
Augie Fackler
formatting: blacken the codebase...
r43346 ui.status(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"repository uses revlog format %d\n")
Augie Fackler
formatting: blacken the codebase...
r43346 % (self.revlogv1 and 1 or 0)
)
Durham Goode
verify: move verify logic into a class...
r27443
verify: add some inline documentation to the top level `verify` method...
r42035 # data verification
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 mflinkrevs, filelinkrevs = self._verifychangelog()
filenodes = self._verifymanifest(mflinkrevs)
Martin von Zweigbergk
verify: move cross-checking of changeset/manifest out of _crosscheckfiles()...
r28111 del mflinkrevs
self._crosscheckfiles(filelinkrevs, filenodes)
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
verify: add some inline documentation to the top level `verify` method...
r42035 # final report
Augie Fackler
formatting: blacken the codebase...
r43346 ui.status(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"checked %d changesets with %d changes to %d files\n")
Augie Fackler
formatting: blacken the codebase...
r43346 % (len(repo.changelog), filerevisions, totalfiles)
)
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 if self.warnings:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 if self.fncachewarned:
Augie Fackler
formatting: blacken the codebase...
r43346 ui.warn(
_(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'hint: run "hg debugrebuildfncache" to recover from '
b'corrupt fncache\n'
Augie Fackler
formatting: blacken the codebase...
r43346 )
)
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 if self.errors:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
Durham Goode
verify: get rid of some unnecessary local variables...
r27648 if self.badrevs:
Augie Fackler
formatting: blacken the codebase...
r43346 ui.warn(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"(first damaged changeset appears to be %d)\n")
Augie Fackler
formatting: blacken the codebase...
r43346 % min(self.badrevs)
)
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 return 1
verify: explicitly return 0 if no error are encountered...
r42034 return 0
Durham Goode
verify: move changelog verificaiton to its own function...
r27647
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 def _verifychangelog(self):
verify: document `_verifychangelog`...
r42041 """verify the changelog of a repository
The following checks are performed:
- all of `_checkrevlog` checks,
- all of `_checkentry` checks (for each revisions),
- each revision can be read.
The function returns some of the data observed in the changesets as a
(mflinkrevs, filelinkrevs) tuples:
- mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
- filelinkrevs: is a { file-path -> [changelog-rev] } mapping
If a matcher was specified, filelinkrevs will only contains matched
files.
"""
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 ui = self.ui
repo = self.repo
Martin von Zweigbergk
verify: replace _validpath() by matcher...
r30866 match = self.match
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 cl = repo.changelog
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ui.status(_(b"checking changesets\n"))
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 mflinkrevs = {}
filelinkrevs = {}
Durham Goode
verify: move verify logic into a class...
r27443 seen = {}
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._checkrevlog(cl, b"changelog", 0)
Augie Fackler
formatting: blacken the codebase...
r43346 progress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b'checking'), unit=_(b'changesets'), total=len(repo)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Durham Goode
verify: move verify logic into a class...
r27443 for i in repo:
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.update(i)
Durham Goode
verify: move verify logic into a class...
r27443 n = cl.node(i)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._checkentry(cl, i, n, seen, [i], b"changelog")
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 try:
changes = cl.read(n)
if changes[0] != nullid:
mflinkrevs.setdefault(changes[0], []).append(i)
Durham Goode
verify: move widely used variables into class members...
r27444 self.refersmf = True
Durham Goode
verify: move verify logic into a class...
r27443 for f in changes[3]:
Martin von Zweigbergk
verify: replace _validpath() by matcher...
r30866 if match(f):
Durham Goode
verify: move verify logic into a class...
r27443 filelinkrevs.setdefault(_normpath(f), []).append(i)
except Exception as inst:
Durham Goode
verify: move widely used variables into class members...
r27444 self.refersmf = True
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.complete()
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 return mflinkrevs, filelinkrevs
Matt Mackall
Move repo.verify
r2778
Augie Fackler
formatting: blacken the codebase...
r43346 def _verifymanifest(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
Augie Fackler
formatting: blacken the codebase...
r43346 ):
verify: document the `_verifymanifest` method
r42042 """verify the manifestlog content
Inputs:
- mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
- dir: a subdirectory to check (for tree manifest repo)
- storefiles: set of currently "orphan" files.
- subdirprogress: a progress object
This function checks:
* all of `_checkrevlog` checks (for all manifest related revlogs)
* all of `_checkentry` checks (for all manifest related revisions)
* nodes for subdirectory exists in the sub-directory manifest
* each manifest entries have a file path
* each manifest node refered in mflinkrevs exist in the manifest log
If tree manifest is in use and a matchers is specified, only the
sub-directories matching it will be verified.
return a two level mapping:
{"path" -> { filenode -> changelog-revision}}
This mapping primarily contains entries for every files in the
repository. In addition, when tree-manifest is used, it also contains
sub-directory entries.
If a matcher is provided, only matching paths will be included.
"""
Durham Goode
verify: move manifest verification to its own function...
r27646 repo = self.repo
ui = self.ui
Martin von Zweigbergk
verify: replace _validpath() by matcher...
r30866 match = self.match
Durham Goode
manifest: remove manifest.readshallowdelta...
r30295 mfl = self.repo.manifestlog
Gregory Szorc
manifest: add getstorage() to manifestlog and use it globally...
r39280 mf = mfl.getstorage(dir)
Durham Goode
verify: move manifest verification to its own function...
r27646
Martin von Zweigbergk
verify: check directory manifests...
r28203 if not dir:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self.ui.status(_(b"checking manifests\n"))
Martin von Zweigbergk
verify: check directory manifests...
r28203
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 filenodes = {}
Martin von Zweigbergk
verify: check directory manifests...
r28203 subdirnodes = {}
Durham Goode
verify: move verify logic into a class...
r27443 seen = {}
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 label = b"manifest"
Martin von Zweigbergk
verify: check directory manifests...
r28203 if dir:
label = dir
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204 revlogfiles = mf.files()
storefiles.difference_update(revlogfiles)
Augie Fackler
formatting: blacken the codebase...
r43346 if subdirprogress: # should be true since we're in a subdirectory
Martin von Zweigbergk
verify: use progress helper for subdirectory progress...
r38415 subdirprogress.increment()
Durham Goode
verify: move widely used variables into class members...
r27444 if self.refersmf:
Durham Goode
verify: move verify logic into a class...
r27443 # Do not check manifest if there are only changelog entries with
# null manifests.
verify: rename the `checklog` to `_checkrevlog`...
r42040 self._checkrevlog(mf, label, 0)
Augie Fackler
formatting: blacken the codebase...
r43346 progress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b'checking'), unit=_(b'manifests'), total=len(mf)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Durham Goode
verify: move verify logic into a class...
r27443 for i in mf:
Martin von Zweigbergk
verify: check directory manifests...
r28203 if not dir:
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.update(i)
Durham Goode
verify: move verify logic into a class...
r27443 n = mf.node(i)
verify: make `checkentry` a private method...
r42037 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
Durham Goode
verify: move verify logic into a class...
r27443 if n in mflinkrevs:
del mflinkrevs[n]
Martin von Zweigbergk
verify: check directory manifests...
r28203 elif dir:
Augie Fackler
formatting: blacken the codebase...
r43346 self._err(
lr,
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"%s not in parent-directory manifest") % short(n),
Augie Fackler
formatting: blacken the codebase...
r43346 label,
)
Durham Goode
verify: move verify logic into a class...
r27443 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(lr, _(b"%s not in changesets") % short(n), label)
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 try:
Durham Goode
manifest: remove manifest.readshallowdelta...
r30295 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
for f, fn, fl in mfdelta.iterentries():
Durham Goode
verify: move verify logic into a class...
r27443 if not f:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(lr, _(b"entry without name in manifest"))
elif f == b"/dev/null": # ignore this in very old repos
Martin von Zweigbergk
verify: check directory manifests...
r28203 continue
fullpath = dir + _normpath(f)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if fl == b't':
Martin von Zweigbergk
verify: replace _validpath() by matcher...
r30866 if not match.visitdir(fullpath):
continue
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
Augie Fackler
formatting: blacken the codebase...
r43346 fn, []
).append(lr)
Martin von Zweigbergk
verify: check directory manifests...
r28203 else:
Martin von Zweigbergk
verify: replace _validpath() by matcher...
r30866 if not match(fullpath):
continue
Martin von Zweigbergk
verify: check directory manifests...
r28203 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
Durham Goode
verify: move verify logic into a class...
r27443 except Exception as inst:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
verify: also check full manifest validity during verify runs...
r42333 if self._level >= VERIFY_FULL:
try:
# Various issues can affect manifest. So we read each full
# text from storage. This triggers the checks from the core
# code (eg: hash verification, filename are ordered, etc.)
mfdelta = mfl.get(dir, n).read()
except Exception as inst:
Augie Fackler
formatting: blacken the codebase...
r43346 self._exc(
lr,
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"reading full manifest %s") % short(n),
Augie Fackler
formatting: blacken the codebase...
r43346 inst,
label,
)
verify: also check full manifest validity during verify runs...
r42333
Martin von Zweigbergk
verify: check directory manifests...
r28203 if not dir:
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.complete()
Durham Goode
verify: move verify logic into a class...
r27443
Martin von Zweigbergk
verify: move cross-checking of changeset/manifest out of _crosscheckfiles()...
r28111 if self.havemf:
verify: small refactoring and documentation in `_verifymanifest`...
r42043 # since we delete entry in `mflinkrevs` during iteration, any
# remaining entries are "missing". We need to issue errors for them.
changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
for c, m in sorted(changesetpairs):
Martin von Zweigbergk
verify: check directory manifests...
r28203 if dir:
Augie Fackler
formatting: blacken the codebase...
r43346 self._err(
c,
_(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b"parent-directory manifest refers to unknown"
b" revision %s"
Augie Fackler
formatting: blacken the codebase...
r43346 )
% short(m),
label,
)
Martin von Zweigbergk
verify: check directory manifests...
r28203 else:
Augie Fackler
formatting: blacken the codebase...
r43346 self._err(
c,
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"changeset refers to unknown revision %s")
% short(m),
Augie Fackler
formatting: blacken the codebase...
r43346 label,
)
Martin von Zweigbergk
verify: check directory manifests...
r28203
if not dir and subdirnodes:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self.ui.status(_(b"checking directory manifests\n"))
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204 storefiles = set()
Martin von Zweigbergk
verify: show progress while verifying dirlogs...
r28205 subdirs = set()
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204 revlogv1 = self.revlogv1
for f, f2, size in repo.store.datafiles():
if not f:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(None, _(b"cannot decode filename '%s'") % f2)
elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204 storefiles.add(_normpath(f))
Martin von Zweigbergk
verify: show progress while verifying dirlogs...
r28205 subdirs.add(os.path.dirname(f))
Augie Fackler
formatting: blacken the codebase...
r43346 subdirprogress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204
Gregory Szorc
py3: finish porting iteritems() to pycompat and remove source transformer...
r43376 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
Augie Fackler
formatting: blacken the codebase...
r43346 subdirfilenodes = self._verifymanifest(
linkrevs, subdir, storefiles, subdirprogress
)
Gregory Szorc
py3: finish porting iteritems() to pycompat and remove source transformer...
r43376 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
Martin von Zweigbergk
verify: check directory manifests...
r28203 filenodes.setdefault(f, {}).update(onefilenodes)
Martin von Zweigbergk
verify: move cross-checking of changeset/manifest out of _crosscheckfiles()...
r28111
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204 if not dir and subdirnodes:
Martin von Zweigbergk
verify: use progress helper for subdirectory progress...
r38415 subdirprogress.complete()
Gregory Szorc
verify: allow suppressing warnings about extra files...
r37435 if self.warnorphanstorefiles:
for f in sorted(storefiles):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._warn(_(b"warning: orphan data file '%s'") % f)
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 return filenodes
Durham Goode
verify: move file cross checking to its own function...
r27645
Martin von Zweigbergk
verify: move cross-checking of changeset/manifest out of _crosscheckfiles()...
r28111 def _crosscheckfiles(self, filelinkrevs, filenodes):
Durham Goode
verify: move file cross checking to its own function...
r27645 repo = self.repo
ui = self.ui
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ui.status(_(b"crosschecking files in changesets and manifests\n"))
Matt Mackall
Move repo.verify
r2778
Martin von Zweigbergk
verify: move cross-checking of changeset/manifest out of _crosscheckfiles()...
r28111 total = len(filelinkrevs) + len(filenodes)
Augie Fackler
formatting: blacken the codebase...
r43346 progress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b'crosschecking'), unit=_(b'files'), total=total
Augie Fackler
formatting: blacken the codebase...
r43346 )
Durham Goode
verify: move file cross checking to its own function...
r27645 if self.havemf:
Durham Goode
verify: move verify logic into a class...
r27443 for f in sorted(filelinkrevs):
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.increment()
Durham Goode
verify: move verify logic into a class...
r27443 if f not in filenodes:
lr = filelinkrevs[f][0]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(lr, _(b"in changeset but not in manifest"), f)
Adrian Buehlmann
verify: check repo.store
r6892
Durham Goode
verify: move file cross checking to its own function...
r27645 if self.havecl:
Durham Goode
verify: move verify logic into a class...
r27443 for f in sorted(filenodes):
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.increment()
Durham Goode
verify: move verify logic into a class...
r27443 if f not in filelinkrevs:
try:
fl = repo.file(f)
lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
except Exception:
lr = None
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(lr, _(b"in manifest but not in changeset"), f)
Durham Goode
verify: move verify logic into a class...
r27443
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.complete()
Henrik Stuart
verify: avoid exception on missing file revlog...
r8291
Durham Goode
verify: move filelog verification to its own function...
r27644 def _verifyfiles(self, filenodes, filelinkrevs):
repo = self.repo
ui = self.ui
lrugetctx = self.lrugetctx
revlogv1 = self.revlogv1
havemf = self.havemf
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ui.status(_(b"checking files\n"))
Henrik Stuart
verify: avoid exception on missing file revlog...
r8291
Durham Goode
verify: move verify logic into a class...
r27443 storefiles = set()
for f, f2, size in repo.store.datafiles():
if not f:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(None, _(b"cannot decode filename '%s'") % f2)
elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
Durham Goode
verify: move verify logic into a class...
r27443 storefiles.add(_normpath(f))
Adrian Buehlmann
verify: check repo.store
r6892
Gregory Szorc
verify: start to abstract file verification...
r39878 state = {
Gregory Szorc
revlog: use proper version comparison during verify...
r39881 # TODO this assumes revlog storage for changelog.
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'expectedversion': self.repo.changelog.version & 0xFFFF,
b'skipflags': self.skipflags,
Gregory Szorc
revlog: move revision verification out of verify...
r39908 # experimental config: censor.policy
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
Gregory Szorc
verify: start to abstract file verification...
r39878 }
Durham Goode
verify: move verify logic into a class...
r27443 files = sorted(set(filenodes) | set(filelinkrevs))
Durham Goode
verify: move filelog verification to its own function...
r27644 revisions = 0
Augie Fackler
formatting: blacken the codebase...
r43346 progress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b'checking'), unit=_(b'files'), total=len(files)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Durham Goode
verify: move verify logic into a class...
r27443 for i, f in enumerate(files):
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.update(i, item=f)
Adrian Buehlmann
verify: check repo.store
r6892 try:
Durham Goode
verify: move verify logic into a class...
r27443 linkrevs = filelinkrevs[f]
Adrian Buehlmann
verify: check repo.store
r6892 except KeyError:
Durham Goode
verify: move verify logic into a class...
r27443 # in manifest but not in changelog
linkrevs = []
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 if linkrevs:
lr = linkrevs[0]
else:
lr = None
Matt Mackall
Move repo.verify
r2778
Matt Mackall
verify: add rename link checking
r3744 try:
Durham Goode
verify: move verify logic into a class...
r27443 fl = repo.file(f)
Gregory Szorc
global: replace most uses of RevlogError with StorageError (API)...
r39813 except error.StorageError as e:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(lr, _(b"broken revlog! (%s)") % e, f)
Durham Goode
verify: move verify logic into a class...
r27443 continue
for ff in fl.files():
try:
storefiles.remove(ff)
except KeyError:
Gregory Szorc
verify: allow suppressing warnings about extra files...
r37435 if self.warnorphanstorefiles:
Augie Fackler
formatting: blacken the codebase...
r43346 self._warn(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b" warning: revlog '%s' not in fncache!") % ff
Augie Fackler
formatting: blacken the codebase...
r43346 )
Gregory Szorc
verify: allow suppressing warnings about extra files...
r37435 self.fncachewarned = True
Durham Goode
verify: move verify logic into a class...
r27443
Gregory Szorc
verify: start to abstract file verification...
r39878 if not len(fl) and (self.havecl or self.havemf):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(lr, _(b"empty or missing %s") % f)
Gregory Szorc
verify: start to abstract file verification...
r39878 else:
Gregory Szorc
revlog: move revision verification out of verify...
r39908 # Guard against implementations not setting this.
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 state[b'skipread'] = set()
Gregory Szorc
verify: start to abstract file verification...
r39878 for problem in fl.verifyintegrity(state):
Gregory Szorc
revlog: move revision verification out of verify...
r39908 if problem.node is not None:
linkrev = fl.linkrev(fl.rev(problem.node))
else:
linkrev = None
Gregory Szorc
verify: start to abstract file verification...
r39878 if problem.warning:
verify: make the `warn` method private...
r42028 self._warn(problem.warning)
Gregory Szorc
verify: start to abstract file verification...
r39878 elif problem.error:
Augie Fackler
formatting: blacken the codebase...
r43346 self._err(
linkrev if linkrev is not None else lr,
problem.error,
f,
)
Gregory Szorc
verify: start to abstract file verification...
r39878 else:
raise error.ProgrammingError(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'problem instance does not set warning or error '
b'attribute: %s' % problem.msg
Augie Fackler
formatting: blacken the codebase...
r43346 )
Gregory Szorc
verify: start to abstract file verification...
r39878
Durham Goode
verify: move verify logic into a class...
r27443 seen = {}
for i in fl:
revisions += 1
n = fl.node(i)
verify: make `checkentry` a private method...
r42037 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
Durham Goode
verify: move verify logic into a class...
r27443 if f in filenodes:
if havemf and n not in filenodes[f]:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
Patrick Mezard
verify: check copy source revlog and nodeid
r6534 else:
Durham Goode
verify: move verify logic into a class...
r27443 del filenodes[f][n]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if n in state[b'skipread']:
Gregory Szorc
revlog: move revision verification out of verify...
r39908 continue
Matt Mackall
verify: add rename link checking
r3744
Durham Goode
verify: move verify logic into a class...
r27443 # check renames
try:
Gregory Szorc
revlog: move revision verification out of verify...
r39908 # This requires resolving fulltext (at least on revlogs). We
# may want ``verifyintegrity()`` to pass a set of nodes with
# rename metadata as an optimization.
rp = fl.renamed(n)
Durham Goode
verify: move verify logic into a class...
r27443 if rp:
if lr is not None and ui.verbose:
ctx = lrugetctx(lr)
Martin von Zweigbergk
verify: don't reimplement any()...
r36357 if not any(rp[0] in pctx for pctx in ctx.parents()):
Augie Fackler
formatting: blacken the codebase...
r43346 self._warn(
_(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b"warning: copy source of '%s' not"
b" in parents of %s"
Augie Fackler
formatting: blacken the codebase...
r43346 )
% (f, ctx)
)
Durham Goode
verify: move verify logic into a class...
r27443 fl2 = repo.file(rp[0])
if not len(fl2):
Augie Fackler
formatting: blacken the codebase...
r43346 self._err(
lr,
_(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b"empty or missing copy source revlog "
b"%s:%s"
Augie Fackler
formatting: blacken the codebase...
r43346 )
% (rp[0], short(rp[1])),
f,
)
Durham Goode
verify: move verify logic into a class...
r27443 elif rp[1] == nullid:
Augie Fackler
formatting: blacken the codebase...
r43346 ui.note(
_(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b"warning: %s@%s: copy source"
b" revision is nullid %s:%s\n"
Augie Fackler
formatting: blacken the codebase...
r43346 )
% (f, lr, rp[0], short(rp[1]))
)
Durham Goode
verify: move verify logic into a class...
r27443 else:
fl2.rev(rp[1])
except Exception as inst:
Augie Fackler
formatting: blacken the codebase...
r43346 self._exc(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 lr, _(b"checking rename of %s") % short(n), inst, f
Augie Fackler
formatting: blacken the codebase...
r43346 )
Adrian Buehlmann
verify: check repo.store
r6892
Durham Goode
verify: move verify logic into a class...
r27443 # cross-check
if f in filenodes:
Gregory Szorc
py3: finish porting iteritems() to pycompat and remove source transformer...
r43376 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
Durham Goode
verify: move verify logic into a class...
r27443 for lr, node in sorted(fns):
Augie Fackler
formatting: blacken the codebase...
r43346 self._err(
lr,
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"manifest refers to unknown revision %s")
Augie Fackler
formatting: blacken the codebase...
r43346 % short(node),
f,
)
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.complete()
Durham Goode
verify: move verify logic into a class...
r27443
Gregory Szorc
verify: allow suppressing warnings about extra files...
r37435 if self.warnorphanstorefiles:
for f in sorted(storefiles):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._warn(_(b"warning: orphan data file '%s'") % f)
Durham Goode
verify: move verify logic into a class...
r27443
Durham Goode
verify: move filelog verification to its own function...
r27644 return len(files), revisions