##// END OF EJS Templates
config: gather constant and type into the `__init__.py`...
config: gather constant and type into the `__init__.py` This will help using them in multiple files.

File last commit:

r52756:f4733654 default
r53322:3e79ca01 default
Show More
verify.py
630 lines | 23.3 KiB | text/x-python | PythonLexer
Matt Mackall
Move repo.verify
r2778 # verify.py - repository integrity checking for Mercurial
#
Raphaël Gomès
contributor: change mentions of mpm to olivia...
r47575 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
Matt Mackall
Move repo.verify
r2778 #
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Matt Mackall
Move repo.verify
r2778
Matt Harbison
typing: add `from __future__ import annotations` to most files...
r52756 from __future__ import annotations
Gregory Szorc
verify: use absolute_import
r25991
Bryan O'Sullivan
verify: fix all doubled-slash sites (issue3665)
r17860 import os
Gregory Szorc
verify: use absolute_import
r25991
from .i18n import _
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 from .node import short
from .utils import stringutil
Gregory Szorc
verify: use absolute_import
r25991
from . import (
error,
Pulkit Goyal
py3: use pycompat.bytestr() instead of str()...
r35603 pycompat,
Raphaël Gomès
verify: also check dirstate...
r50721 requirements,
Gregory Szorc
verify: use absolute_import
r25991 revlog,
Raphaël Gomès
transaction: abstract away the detection of an abandoned transaction...
r51881 transaction,
Gregory Szorc
verify: use absolute_import
r25991 util,
)
Matt Mackall
Move repo.verify
r2778
verify: introduce a notion of "level"...
r42331 VERIFY_DEFAULT = 0
verify: introduce an experimental --full flag...
r42332 VERIFY_FULL = 1
verify: introduce a notion of "level"...
r42331
Augie Fackler
formatting: blacken the codebase...
r43346
verify: introduce a notion of "level"...
r42331 def verify(repo, level=None):
Bryan O'Sullivan
with: use context manager in verify
r27849 with repo.lock():
verify: introduce a notion of "level"...
r42331 v = verifier(repo, level)
return v.verify()
Matt Mackall
Use try/finally pattern to cleanup locks and transactions
r4915
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add type hints to `mercurial.verify._normpath()`...
r52611 def _normpath(f: bytes) -> bytes:
Bryan O'Sullivan
verify: fix all doubled-slash sites (issue3665)
r17860 # under hg < 2.4, convert didn't sanitize paths properly, so a
# converted repo may contain repeated slashes
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 while b'//' in f:
f = f.replace(b'//', b'/')
Bryan O'Sullivan
verify: fix all doubled-slash sites (issue3665)
r17860 return f
Augie Fackler
formatting: blacken the codebase...
r43346
verify: use some intermediate variables instead of a multi-liner...
r48147 HINT_FNCACHE = _(
b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
)
verify: use some intermediate variables instead of a multi-liner...
r48152 WARN_PARENT_DIR_UNKNOWN_REV = _(
b"parent-directory manifest refers to unknown revision %s"
)
verify: use some intermediate variables instead of a multi-liner...
r48156 WARN_UNKNOWN_COPY_SOURCE = _(
b"warning: copy source of '%s' not in parents of %s"
)
verify: use some intermediate variables instead of a multi-liner...
r48158 WARN_NULLID_COPY_SOURCE = _(
b"warning: %s@%s: copy source revision is nullid %s:%s\n"
)
verify: use some intermediate variables instead of a multi-liner...
r48147
Gregory Szorc
py3: use class X: instead of class X(object):...
r49801 class verifier:
verify: introduce a notion of "level"...
r42331 def __init__(self, repo, level=None):
Durham Goode
verify: move widely used variables into class members...
r27444 self.repo = repo.unfiltered()
self.ui = repo.ui
Martin von Zweigbergk
narrow: move support for `hg verify` into core...
r39974 self.match = repo.narrowmatch()
verify: introduce a notion of "level"...
r42331 if level is None:
level = VERIFY_DEFAULT
self._level = level
Durham Goode
verify: move widely used variables into class members...
r27444 self.badrevs = set()
Matt Mackall
verify: clean up weird error/warning lists...
r27453 self.errors = 0
self.warnings = 0
Durham Goode
verify: move widely used variables into class members...
r27444 self.havecl = len(repo.changelog) > 0
Gregory Szorc
manifest: add getstorage() to manifestlog and use it globally...
r39280 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
revlog: split the `version` attribute into its two components...
r47910 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
Matt Harbison
verify: avoid spurious integrity warnings in verbose mode (issue6172)...
r44572 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
Durham Goode
verify: move widely used variables into class members...
r27444 self.refersmf = False
Durham Goode
verify: move fncachewarned up to a class variable...
r27445 self.fncachewarned = False
Jun Wu
verify: add a config option to skip certain flag processors...
r32288 # developer config: verify.skipflags
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
Gregory Szorc
verify: allow suppressing warnings about extra files...
r37435 self.warnorphanstorefiles = True
Durham Goode
verify: move widely used variables into class members...
r27444
verify: make the `warn` method private...
r42028 def _warn(self, msg):
verify: document the `warn` method...
r42027 """record a "warning" level issue"""
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self.ui.warn(msg + b"\n")
Matt Mackall
verify: clean up weird error/warning lists...
r27453 self.warnings += 1
Durham Goode
verify: move warn() to a class level function...
r27446
verify: make `err` a private method...
r42030 def _err(self, linkrev, msg, filename=None):
verify: document the `err` method...
r42029 """record a "error" level issue"""
Durham Goode
verify: move err() to be a class function...
r27447 if linkrev is not None:
self.badrevs.add(linkrev)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 linkrev = b"%d" % linkrev
Durham Goode
verify: move err() to be a class function...
r27447 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 linkrev = b'?'
msg = b"%s: %s" % (linkrev, msg)
Durham Goode
verify: move err() to be a class function...
r27447 if filename:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 msg = b"%s@%s" % (filename, msg)
self.ui.warn(b" " + msg + b"\n")
Matt Mackall
verify: clean up weird error/warning lists...
r27453 self.errors += 1
Durham Goode
verify: move err() to be a class function...
r27447
verify: make the `exc` method private...
r42032 def _exc(self, linkrev, msg, inst, filename=None):
verify: document the `exc` method...
r42031 """record exception raised during the verify process"""
Matt Harbison
verify: convert an exception to bytes before logging...
r47523 fmsg = stringutil.forcebytestr(inst)
Augie Fackler
verify: fix exception formatting bug in Python 3...
r36595 if not fmsg:
fmsg = pycompat.byterepr(inst)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
Durham Goode
verify: move exc() function onto class...
r27448
verify: rename the `checklog` to `_checkrevlog`...
r42040 def _checkrevlog(self, obj, name, linkrev):
verify: document the `checklog` method...
r42039 """verify high level property of a revlog
- revlog is present,
- revlog is non-empty,
- sizes (index and data) are correct,
- revlog's format version is correct.
"""
Durham Goode
verify: move checklog() onto class...
r27642 if not len(obj) and (self.havecl or self.havemf):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(linkrev, _(b"empty or missing %s") % name)
Durham Goode
verify: move checklog() onto class...
r27642 return
d = obj.checksize()
if d[0]:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(None, _(b"data length off by %d bytes") % d[0], name)
Durham Goode
verify: move checklog() onto class...
r27642 if d[1]:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
Durham Goode
verify: move checklog() onto class...
r27642
revlog: split the `version` attribute into its two components...
r47910 if obj._format_version != revlog.REVLOGV0:
Durham Goode
verify: move checklog() onto class...
r27642 if not self.revlogv1:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
Durham Goode
verify: move checklog() onto class...
r27642 elif self.revlogv1:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
Durham Goode
verify: move checklog() onto class...
r27642
verify: make `checkentry` a private method...
r42037 def _checkentry(self, obj, i, node, seen, linkrevs, f):
verify: document the `checkentry` method...
r42036 """verify a single revlog entry
arguments are:
- obj: the source revlog
- i: the revision number
verify: align a comment line...
r48142 - node: the revision node id
verify: document the `checkentry` method...
r42036 - seen: nodes previously seen for this revlog
- linkrevs: [changelog-revisions] introducing "node"
- f: string label ("changelog", "manifest", or filename)
Performs the following checks:
- linkrev points to an existing changelog revision,
- linkrev points to a changelog revision that introduces this revision,
- linkrev points to the lowest of these changesets,
- both parents exist in the revlog,
- the revision is not duplicated.
Return the linkrev of the revision (or None for changelog's revisions).
"""
Durham Goode
verify: move checkentry() to be a class function...
r27643 lr = obj.linkrev(obj.rev(node))
if lr < 0 or (self.havecl and lr not in linkrevs):
if lr < 0 or lr >= len(self.repo.changelog):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 msg = _(b"rev %d points to nonexistent changeset %d")
Durham Goode
verify: move checkentry() to be a class function...
r27643 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 msg = _(b"rev %d points to unexpected changeset %d")
verify: make `err` a private method...
r42030 self._err(None, msg % (i, lr), f)
Durham Goode
verify: move checkentry() to be a class function...
r27643 if linkrevs:
if f and len(linkrevs) > 1:
try:
# attempt to filter down to real linkrevs
verify: expand a one liner into explicit commands...
r48143 linkrevs = []
for lr in linkrevs:
if self.lrugetctx(lr)[f].filenode() == node:
linkrevs.append(lr)
Durham Goode
verify: move checkentry() to be a class function...
r27643 except Exception:
pass
verify: use some intermediate variables instead of a multi-liner...
r48144 msg = _(b" (expected %s)")
msg %= b" ".join(map(pycompat.bytestr, linkrevs))
self._warn(msg)
Augie Fackler
formatting: blacken the codebase...
r43346 lr = None # can't be trusted
Durham Goode
verify: move checkentry() to be a class function...
r27643
try:
p1, p2 = obj.parents(node)
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 if p1 not in seen and p1 != self.repo.nullid:
verify: use some intermediate variables instead of a multi-liner...
r48145 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
self._err(lr, msg, f)
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 if p2 not in seen and p2 != self.repo.nullid:
verify: use some intermediate variables instead of a multi-liner...
r48146 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
self._err(lr, msg, f)
Durham Goode
verify: move checkentry() to be a class function...
r27643 except Exception as inst:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
Durham Goode
verify: move checkentry() to be a class function...
r27643
if node in seen:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
Durham Goode
verify: move checkentry() to be a class function...
r27643 seen[node] = i
return lr
Durham Goode
verify: move widely used variables into class members...
r27444 def verify(self):
verify: minimal documentation for `verifier.verify`...
r42033 """verify the content of the Mercurial repository
This method run all verifications, displaying issues as they are found.
verify: explicitly return 0 if no error are encountered...
r42034 return 1 if any error have been encountered, 0 otherwise."""
verify: add some inline documentation to the top level `verify` method...
r42035 # initial validation and generic report
Durham Goode
verify: move widely used variables into class members...
r27444 repo = self.repo
Durham Goode
verify: move verify logic into a class...
r27443 ui = repo.ui
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if not repo.url().startswith(b'file:'):
raise error.Abort(_(b"cannot verify bundle or remote repos"))
Matt Mackall
verify: lots of refactoring...
r6752
Raphaël Gomès
transaction: abstract away the detection of an abandoned transaction...
r51881 if transaction.has_abandoned_transaction(repo):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
Durham Goode
verify: move verify logic into a class...
r27443
Durham Goode
verify: get rid of some unnecessary local variables...
r27648 if ui.verbose or not self.revlogv1:
Augie Fackler
formatting: blacken the codebase...
r43346 ui.status(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"repository uses revlog format %d\n")
Augie Fackler
formatting: blacken the codebase...
r43346 % (self.revlogv1 and 1 or 0)
)
Durham Goode
verify: move verify logic into a class...
r27443
verify: add some inline documentation to the top level `verify` method...
r42035 # data verification
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 mflinkrevs, filelinkrevs = self._verifychangelog()
filenodes = self._verifymanifest(mflinkrevs)
Martin von Zweigbergk
verify: move cross-checking of changeset/manifest out of _crosscheckfiles()...
r28111 del mflinkrevs
self._crosscheckfiles(filelinkrevs, filenodes)
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
Raphaël Gomès
verify: also check dirstate...
r50721 if self.errors:
ui.warn(_(b"not checking dirstate because of previous errors\n"))
dirstate_errors = 0
else:
dirstate_errors = self._verify_dirstate()
verify: add some inline documentation to the top level `verify` method...
r42035 # final report
Augie Fackler
formatting: blacken the codebase...
r43346 ui.status(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"checked %d changesets with %d changes to %d files\n")
Augie Fackler
formatting: blacken the codebase...
r43346 % (len(repo.changelog), filerevisions, totalfiles)
)
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 if self.warnings:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 if self.fncachewarned:
verify: use some intermediate variables instead of a multi-liner...
r48147 ui.warn(HINT_FNCACHE)
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 if self.errors:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
Durham Goode
verify: get rid of some unnecessary local variables...
r27648 if self.badrevs:
verify: use some intermediate variables instead of a multi-liner...
r48148 msg = _(b"(first damaged changeset appears to be %d)\n")
msg %= min(self.badrevs)
ui.warn(msg)
Raphaël Gomès
verify: also check dirstate...
r50721 if dirstate_errors:
ui.warn(
_(b"dirstate inconsistent with current parent's manifest\n")
)
ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 return 1
verify: explicitly return 0 if no error are encountered...
r42034 return 0
Durham Goode
verify: move changelog verificaiton to its own function...
r27647
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 def _verifychangelog(self):
verify: document `_verifychangelog`...
r42041 """verify the changelog of a repository
The following checks are performed:
- all of `_checkrevlog` checks,
- all of `_checkentry` checks (for each revisions),
- each revision can be read.
The function returns some of the data observed in the changesets as a
(mflinkrevs, filelinkrevs) tuples:
- mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
- filelinkrevs: is a { file-path -> [changelog-rev] } mapping
If a matcher was specified, filelinkrevs will only contains matched
files.
"""
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 ui = self.ui
repo = self.repo
Martin von Zweigbergk
verify: replace _validpath() by matcher...
r30866 match = self.match
Durham Goode
verify: move changelog verificaiton to its own function...
r27647 cl = repo.changelog
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ui.status(_(b"checking changesets\n"))
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 mflinkrevs = {}
filelinkrevs = {}
Durham Goode
verify: move verify logic into a class...
r27443 seen = {}
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._checkrevlog(cl, b"changelog", 0)
Augie Fackler
formatting: blacken the codebase...
r43346 progress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b'checking'), unit=_(b'changesets'), total=len(repo)
Augie Fackler
formatting: blacken the codebase...
r43346 )
verify: keep the revlog open for reading while verifying it...
r51908 with cl.reading():
for i in repo:
progress.update(i)
n = cl.node(i)
self._checkentry(cl, i, n, seen, [i], b"changelog")
Matt Mackall
Move repo.verify
r2778
verify: keep the revlog open for reading while verifying it...
r51908 try:
changes = cl.read(n)
if changes[0] != self.repo.nullid:
mflinkrevs.setdefault(changes[0], []).append(i)
self.refersmf = True
for f in changes[3]:
if match(f):
filelinkrevs.setdefault(_normpath(f), []).append(i)
except Exception as inst:
Durham Goode
verify: move widely used variables into class members...
r27444 self.refersmf = True
verify: keep the revlog open for reading while verifying it...
r51908 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.complete()
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 return mflinkrevs, filelinkrevs
Matt Mackall
Move repo.verify
r2778
Augie Fackler
formatting: blacken the codebase...
r43346 def _verifymanifest(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
Augie Fackler
formatting: blacken the codebase...
r43346 ):
verify: document the `_verifymanifest` method
r42042 """verify the manifestlog content
Inputs:
- mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
- dir: a subdirectory to check (for tree manifest repo)
- storefiles: set of currently "orphan" files.
- subdirprogress: a progress object
This function checks:
* all of `_checkrevlog` checks (for all manifest related revlogs)
* all of `_checkentry` checks (for all manifest related revisions)
* nodes for subdirectory exists in the sub-directory manifest
* each manifest entries have a file path
* each manifest node refered in mflinkrevs exist in the manifest log
If tree manifest is in use and a matchers is specified, only the
sub-directories matching it will be verified.
return a two level mapping:
{"path" -> { filenode -> changelog-revision}}
This mapping primarily contains entries for every files in the
repository. In addition, when tree-manifest is used, it also contains
sub-directory entries.
If a matcher is provided, only matching paths will be included.
"""
Durham Goode
verify: move manifest verification to its own function...
r27646 repo = self.repo
ui = self.ui
Martin von Zweigbergk
verify: replace _validpath() by matcher...
r30866 match = self.match
Durham Goode
manifest: remove manifest.readshallowdelta...
r30295 mfl = self.repo.manifestlog
Gregory Szorc
manifest: add getstorage() to manifestlog and use it globally...
r39280 mf = mfl.getstorage(dir)
Durham Goode
verify: move manifest verification to its own function...
r27646
Martin von Zweigbergk
verify: check directory manifests...
r28203 if not dir:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self.ui.status(_(b"checking manifests\n"))
Martin von Zweigbergk
verify: check directory manifests...
r28203
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 filenodes = {}
Martin von Zweigbergk
verify: check directory manifests...
r28203 subdirnodes = {}
Durham Goode
verify: move verify logic into a class...
r27443 seen = {}
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 label = b"manifest"
Martin von Zweigbergk
verify: check directory manifests...
r28203 if dir:
label = dir
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204 revlogfiles = mf.files()
storefiles.difference_update(revlogfiles)
Augie Fackler
formatting: blacken the codebase...
r43346 if subdirprogress: # should be true since we're in a subdirectory
Martin von Zweigbergk
verify: use progress helper for subdirectory progress...
r38415 subdirprogress.increment()
Durham Goode
verify: move widely used variables into class members...
r27444 if self.refersmf:
Durham Goode
verify: move verify logic into a class...
r27443 # Do not check manifest if there are only changelog entries with
# null manifests.
verify: pass a revlog to `_checkrevlog` in `_verifymanifest`...
r47909 self._checkrevlog(mf._revlog, label, 0)
Augie Fackler
formatting: blacken the codebase...
r43346 progress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b'checking'), unit=_(b'manifests'), total=len(mf)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Durham Goode
verify: move verify logic into a class...
r27443 for i in mf:
Martin von Zweigbergk
verify: check directory manifests...
r28203 if not dir:
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.update(i)
Durham Goode
verify: move verify logic into a class...
r27443 n = mf.node(i)
verify: make `checkentry` a private method...
r42037 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
Durham Goode
verify: move verify logic into a class...
r27443 if n in mflinkrevs:
del mflinkrevs[n]
Martin von Zweigbergk
verify: check directory manifests...
r28203 elif dir:
verify: use some intermediate variables instead of a multi-liner...
r48149 msg = _(b"%s not in parent-directory manifest") % short(n)
self._err(lr, msg, label)
Durham Goode
verify: move verify logic into a class...
r27443 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(lr, _(b"%s not in changesets") % short(n), label)
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 try:
manifest: use read_delta_new_entries in verify too...
r52680 mfdelta = mfl.get(dir, n).read_delta_new_entries(shallow=True)
Durham Goode
manifest: remove manifest.readshallowdelta...
r30295 for f, fn, fl in mfdelta.iterentries():
Durham Goode
verify: move verify logic into a class...
r27443 if not f:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(lr, _(b"entry without name in manifest"))
elif f == b"/dev/null": # ignore this in very old repos
Martin von Zweigbergk
verify: check directory manifests...
r28203 continue
fullpath = dir + _normpath(f)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if fl == b't':
Martin von Zweigbergk
verify: replace _validpath() by matcher...
r30866 if not match.visitdir(fullpath):
continue
verify: use some intermediate variables instead of a multi-liner...
r48151 sdn = subdirnodes.setdefault(fullpath + b'/', {})
sdn.setdefault(fn, []).append(lr)
Martin von Zweigbergk
verify: check directory manifests...
r28203 else:
Martin von Zweigbergk
verify: replace _validpath() by matcher...
r30866 if not match(fullpath):
continue
Martin von Zweigbergk
verify: check directory manifests...
r28203 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
Durham Goode
verify: move verify logic into a class...
r27443 except Exception as inst:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
verify: also check full manifest validity during verify runs...
r42333 if self._level >= VERIFY_FULL:
try:
# Various issues can affect manifest. So we read each full
# text from storage. This triggers the checks from the core
# code (eg: hash verification, filename are ordered, etc.)
mfdelta = mfl.get(dir, n).read()
except Exception as inst:
verify: use some intermediate variables instead of a multi-liner...
r48150 msg = _(b"reading full manifest %s") % short(n)
self._exc(lr, msg, inst, label)
verify: also check full manifest validity during verify runs...
r42333
Martin von Zweigbergk
verify: check directory manifests...
r28203 if not dir:
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.complete()
Durham Goode
verify: move verify logic into a class...
r27443
Martin von Zweigbergk
verify: move cross-checking of changeset/manifest out of _crosscheckfiles()...
r28111 if self.havemf:
verify: small refactoring and documentation in `_verifymanifest`...
r42043 # since we delete entry in `mflinkrevs` during iteration, any
# remaining entries are "missing". We need to issue errors for them.
changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
for c, m in sorted(changesetpairs):
Martin von Zweigbergk
verify: check directory manifests...
r28203 if dir:
verify: use some intermediate variables instead of a multi-liner...
r48152 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
Martin von Zweigbergk
verify: check directory manifests...
r28203 else:
verify: use some intermediate variables instead of a multi-liner...
r48153 msg = _(b"changeset refers to unknown revision %s")
msg %= short(m)
self._err(c, msg, label)
Martin von Zweigbergk
verify: check directory manifests...
r28203
if not dir and subdirnodes:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self.ui.status(_(b"checking directory manifests\n"))
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204 storefiles = set()
Martin von Zweigbergk
verify: show progress while verifying dirlogs...
r28205 subdirs = set()
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204 revlogv1 = self.revlogv1
Valentin Gatien-Baron
store: return just one filename in walk functions...
r48691 undecodable = []
store: rename `datafiles` to `data_entries`...
r51397 for entry in repo.store.data_entries(undecodable=undecodable):
store: introduce a EntryFile object to actually access file info...
r51365 for file_ in entry.files():
f = file_.unencoded_path
store: lazily get file size on demand for the fncache case...
r51370 size = file_.file_size(repo.store.vfs)
store: introduce a EntryFile object to actually access file info...
r51365 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
storefiles.add(_normpath(f))
subdirs.add(os.path.dirname(f))
Valentin Gatien-Baron
store: return just one filename in walk functions...
r48691 for f in undecodable:
self._err(None, _(b"cannot decode filename '%s'") % f)
Augie Fackler
formatting: blacken the codebase...
r43346 subdirprogress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204
Gregory Szorc
global: bulk replace simple pycompat.iteritems(x) with x.items()...
r49768 for subdir, linkrevs in subdirnodes.items():
Augie Fackler
formatting: blacken the codebase...
r43346 subdirfilenodes = self._verifymanifest(
linkrevs, subdir, storefiles, subdirprogress
)
Gregory Szorc
global: bulk replace simple pycompat.iteritems(x) with x.items()...
r49768 for f, onefilenodes in subdirfilenodes.items():
Martin von Zweigbergk
verify: check directory manifests...
r28203 filenodes.setdefault(f, {}).update(onefilenodes)
Martin von Zweigbergk
verify: move cross-checking of changeset/manifest out of _crosscheckfiles()...
r28111
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204 if not dir and subdirnodes:
Matt Harbison
typing: add an assertion to verify.py to appease pytype...
r47548 assert subdirprogress is not None # help pytype
Martin von Zweigbergk
verify: use progress helper for subdirectory progress...
r38415 subdirprogress.complete()
Gregory Szorc
verify: allow suppressing warnings about extra files...
r37435 if self.warnorphanstorefiles:
for f in sorted(storefiles):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._warn(_(b"warning: orphan data file '%s'") % f)
Martin von Zweigbergk
verify: check for orphaned dirlogs...
r28204
Martin von Zweigbergk
verify: replace "output parameters" by return values...
r27695 return filenodes
Durham Goode
verify: move file cross checking to its own function...
r27645
Martin von Zweigbergk
verify: move cross-checking of changeset/manifest out of _crosscheckfiles()...
r28111 def _crosscheckfiles(self, filelinkrevs, filenodes):
Durham Goode
verify: move file cross checking to its own function...
r27645 repo = self.repo
ui = self.ui
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ui.status(_(b"crosschecking files in changesets and manifests\n"))
Matt Mackall
Move repo.verify
r2778
Martin von Zweigbergk
verify: move cross-checking of changeset/manifest out of _crosscheckfiles()...
r28111 total = len(filelinkrevs) + len(filenodes)
Augie Fackler
formatting: blacken the codebase...
r43346 progress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b'crosschecking'), unit=_(b'files'), total=total
Augie Fackler
formatting: blacken the codebase...
r43346 )
Durham Goode
verify: move file cross checking to its own function...
r27645 if self.havemf:
Durham Goode
verify: move verify logic into a class...
r27443 for f in sorted(filelinkrevs):
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.increment()
Durham Goode
verify: move verify logic into a class...
r27443 if f not in filenodes:
lr = filelinkrevs[f][0]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(lr, _(b"in changeset but not in manifest"), f)
Adrian Buehlmann
verify: check repo.store
r6892
Durham Goode
verify: move file cross checking to its own function...
r27645 if self.havecl:
Durham Goode
verify: move verify logic into a class...
r27443 for f in sorted(filenodes):
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.increment()
Durham Goode
verify: move verify logic into a class...
r27443 if f not in filelinkrevs:
try:
fl = repo.file(f)
lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
except Exception:
lr = None
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(lr, _(b"in manifest but not in changeset"), f)
Durham Goode
verify: move verify logic into a class...
r27443
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.complete()
Henrik Stuart
verify: avoid exception on missing file revlog...
r8291
Durham Goode
verify: move filelog verification to its own function...
r27644 def _verifyfiles(self, filenodes, filelinkrevs):
repo = self.repo
ui = self.ui
lrugetctx = self.lrugetctx
revlogv1 = self.revlogv1
havemf = self.havemf
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ui.status(_(b"checking files\n"))
Henrik Stuart
verify: avoid exception on missing file revlog...
r8291
Durham Goode
verify: move verify logic into a class...
r27443 storefiles = set()
Valentin Gatien-Baron
store: return just one filename in walk functions...
r48691 undecodable = []
store: rename `datafiles` to `data_entries`...
r51397 for entry in repo.store.data_entries(undecodable=undecodable):
store: introduce a EntryFile object to actually access file info...
r51365 for file_ in entry.files():
store: lazily get file size on demand for the fncache case...
r51370 size = file_.file_size(repo.store.vfs)
store: introduce a EntryFile object to actually access file info...
r51365 f = file_.unencoded_path
if (size > 0 or not revlogv1) and f.startswith(b'data/'):
storefiles.add(_normpath(f))
Valentin Gatien-Baron
store: return just one filename in walk functions...
r48691 for f in undecodable:
self._err(None, _(b"cannot decode filename '%s'") % f)
Adrian Buehlmann
verify: check repo.store
r6892
Gregory Szorc
verify: start to abstract file verification...
r39878 state = {
Gregory Szorc
revlog: use proper version comparison during verify...
r39881 # TODO this assumes revlog storage for changelog.
revlog: split the `version` attribute into its two components...
r47910 b'expectedversion': self.repo.changelog._format_version,
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'skipflags': self.skipflags,
Gregory Szorc
revlog: move revision verification out of verify...
r39908 # experimental config: censor.policy
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
Gregory Szorc
verify: start to abstract file verification...
r39878 }
Durham Goode
verify: move verify logic into a class...
r27443 files = sorted(set(filenodes) | set(filelinkrevs))
Durham Goode
verify: move filelog verification to its own function...
r27644 revisions = 0
Augie Fackler
formatting: blacken the codebase...
r43346 progress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b'checking'), unit=_(b'files'), total=len(files)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Durham Goode
verify: move verify logic into a class...
r27443 for i, f in enumerate(files):
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.update(i, item=f)
Adrian Buehlmann
verify: check repo.store
r6892 try:
Durham Goode
verify: move verify logic into a class...
r27443 linkrevs = filelinkrevs[f]
Adrian Buehlmann
verify: check repo.store
r6892 except KeyError:
Durham Goode
verify: move verify logic into a class...
r27443 # in manifest but not in changelog
linkrevs = []
Matt Mackall
Move repo.verify
r2778
Durham Goode
verify: move verify logic into a class...
r27443 if linkrevs:
lr = linkrevs[0]
else:
lr = None
Matt Mackall
Move repo.verify
r2778
Matt Mackall
verify: add rename link checking
r3744 try:
Durham Goode
verify: move verify logic into a class...
r27443 fl = repo.file(f)
Gregory Szorc
global: replace most uses of RevlogError with StorageError (API)...
r39813 except error.StorageError as e:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(lr, _(b"broken revlog! (%s)") % e, f)
Durham Goode
verify: move verify logic into a class...
r27443 continue
for ff in fl.files():
try:
storefiles.remove(ff)
except KeyError:
Gregory Szorc
verify: allow suppressing warnings about extra files...
r37435 if self.warnorphanstorefiles:
verify: use some intermediate variables instead of a multi-liner...
r48154 msg = _(b" warning: revlog '%s' not in fncache!")
self._warn(msg % ff)
Gregory Szorc
verify: allow suppressing warnings about extra files...
r37435 self.fncachewarned = True
Durham Goode
verify: move verify logic into a class...
r27443
Gregory Szorc
verify: start to abstract file verification...
r39878 if not len(fl) and (self.havecl or self.havemf):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(lr, _(b"empty or missing %s") % f)
Gregory Szorc
verify: start to abstract file verification...
r39878 else:
Gregory Szorc
revlog: move revision verification out of verify...
r39908 # Guard against implementations not setting this.
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 state[b'skipread'] = set()
Matt Harbison
verify: allow the storage to signal when renames can be tested on `skipread`...
r44530 state[b'safe_renamed'] = set()
Gregory Szorc
verify: start to abstract file verification...
r39878 for problem in fl.verifyintegrity(state):
Gregory Szorc
revlog: move revision verification out of verify...
r39908 if problem.node is not None:
linkrev = fl.linkrev(fl.rev(problem.node))
else:
linkrev = None
Gregory Szorc
verify: start to abstract file verification...
r39878 if problem.warning:
verify: make the `warn` method private...
r42028 self._warn(problem.warning)
Gregory Szorc
verify: start to abstract file verification...
r39878 elif problem.error:
verify: use some intermediate variables instead of a multi-liner...
r48155 linkrev_msg = linkrev if linkrev is not None else lr
self._err(linkrev_msg, problem.error, f)
Gregory Szorc
verify: start to abstract file verification...
r39878 else:
raise error.ProgrammingError(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'problem instance does not set warning or error '
b'attribute: %s' % problem.msg
Augie Fackler
formatting: blacken the codebase...
r43346 )
Gregory Szorc
verify: start to abstract file verification...
r39878
Durham Goode
verify: move verify logic into a class...
r27443 seen = {}
for i in fl:
revisions += 1
n = fl.node(i)
verify: make `checkentry` a private method...
r42037 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
Durham Goode
verify: move verify logic into a class...
r27443 if f in filenodes:
if havemf and n not in filenodes[f]:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
Patrick Mezard
verify: check copy source revlog and nodeid
r6534 else:
Durham Goode
verify: move verify logic into a class...
r27443 del filenodes[f][n]
Matt Harbison
verify: allow the storage to signal when renames can be tested on `skipread`...
r44530 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
Gregory Szorc
revlog: move revision verification out of verify...
r39908 continue
Matt Mackall
verify: add rename link checking
r3744
Durham Goode
verify: move verify logic into a class...
r27443 # check renames
try:
Matt Harbison
verify: update comment to say that lfs doesn't need fulltext to check renames...
r44408 # This requires resolving fulltext (at least on revlogs,
# though not with LFS revisions). We may want
# ``verifyintegrity()`` to pass a set of nodes with
Gregory Szorc
revlog: move revision verification out of verify...
r39908 # rename metadata as an optimization.
rp = fl.renamed(n)
Durham Goode
verify: move verify logic into a class...
r27443 if rp:
if lr is not None and ui.verbose:
ctx = lrugetctx(lr)
Martin von Zweigbergk
verify: don't reimplement any()...
r36357 if not any(rp[0] in pctx for pctx in ctx.parents()):
verify: use some intermediate variables instead of a multi-liner...
r48156 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
Durham Goode
verify: move verify logic into a class...
r27443 fl2 = repo.file(rp[0])
if not len(fl2):
verify: use some intermediate variables instead of a multi-liner...
r48157 m = _(b"empty or missing copy source revlog %s:%s")
self._err(lr, m % (rp[0], short(rp[1])), f)
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 elif rp[1] == self.repo.nullid:
verify: use some intermediate variables instead of a multi-liner...
r48158 msg = WARN_NULLID_COPY_SOURCE
msg %= (f, lr, rp[0], short(rp[1]))
ui.note(msg)
Durham Goode
verify: move verify logic into a class...
r27443 else:
fl2.rev(rp[1])
except Exception as inst:
Augie Fackler
formatting: blacken the codebase...
r43346 self._exc(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 lr, _(b"checking rename of %s") % short(n), inst, f
Augie Fackler
formatting: blacken the codebase...
r43346 )
Adrian Buehlmann
verify: check repo.store
r6892
Durham Goode
verify: move verify logic into a class...
r27443 # cross-check
if f in filenodes:
Gregory Szorc
verify: remove pycompat.iteritems()...
r49786 fns = [(v, k) for k, v in filenodes[f].items()]
Durham Goode
verify: move verify logic into a class...
r27443 for lr, node in sorted(fns):
verify: use some intermediate variables instead of a multi-liner...
r48161 msg = _(b"manifest refers to unknown revision %s")
self._err(lr, msg % short(node), f)
Martin von Zweigbergk
verify: use progress helper...
r38416 progress.complete()
Durham Goode
verify: move verify logic into a class...
r27443
Gregory Szorc
verify: allow suppressing warnings about extra files...
r37435 if self.warnorphanstorefiles:
for f in sorted(storefiles):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._warn(_(b"warning: orphan data file '%s'") % f)
Durham Goode
verify: move verify logic into a class...
r27443
Durham Goode
verify: move filelog verification to its own function...
r27644 return len(files), revisions
Raphaël Gomès
verify: also check dirstate...
r50721
def _verify_dirstate(self):
"""Check that the dirstate is consistent with the parent's manifest"""
repo = self.repo
ui = self.ui
ui.status(_(b"checking dirstate\n"))
parent1, parent2 = repo.dirstate.parents()
m1 = repo[parent1].manifest()
m2 = repo[parent2].manifest()
dirstate_errors = 0
is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements
narrow_matcher = repo.narrowmatch() if is_narrow else None
Raphaël Gomès
verify: print short `p1` node in relevant dirstate messages...
r50722 for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):
Raphaël Gomès
verify: also check dirstate...
r50721 ui.error(err)
dirstate_errors += 1
if dirstate_errors:
self.errors += dirstate_errors
return dirstate_errors