verify.py
587 lines
| 21.7 KiB
| text/x-python
|
PythonLexer
/ mercurial / verify.py
Matt Mackall
|
r2778 | # verify.py - repository integrity checking for Mercurial | ||
# | ||||
Raphaël Gomès
|
r47575 | # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com> | ||
Matt Mackall
|
r2778 | # | ||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Matt Mackall
|
r2778 | |||
Gregory Szorc
|
r25991 | |||
Bryan O'Sullivan
|
r17860 | import os | ||
Gregory Szorc
|
r25991 | |||
from .i18n import _ | ||||
Joerg Sonnenberger
|
r47771 | from .node import short | ||
from .utils import stringutil | ||||
Gregory Szorc
|
r25991 | |||
from . import ( | ||||
error, | ||||
Pulkit Goyal
|
r35603 | pycompat, | ||
Gregory Szorc
|
r25991 | revlog, | ||
util, | ||||
) | ||||
Matt Mackall
|
r2778 | |||
r42331 | VERIFY_DEFAULT = 0 | |||
r42332 | VERIFY_FULL = 1 | |||
r42331 | ||||
Augie Fackler
|
r43346 | |||
r42331 | def verify(repo, level=None): | |||
Bryan O'Sullivan
|
r27849 | with repo.lock(): | ||
r42331 | v = verifier(repo, level) | |||
return v.verify() | ||||
Matt Mackall
|
r4915 | |||
Augie Fackler
|
r43346 | |||
Bryan O'Sullivan
|
r17860 | def _normpath(f): | ||
# under hg < 2.4, convert didn't sanitize paths properly, so a | ||||
# converted repo may contain repeated slashes | ||||
Augie Fackler
|
r43347 | while b'//' in f: | ||
f = f.replace(b'//', b'/') | ||||
Bryan O'Sullivan
|
r17860 | return f | ||
Augie Fackler
|
r43346 | |||
r48147 | HINT_FNCACHE = _( | |||
b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n' | ||||
) | ||||
r48152 | WARN_PARENT_DIR_UNKNOWN_REV = _( | |||
b"parent-directory manifest refers to unknown revision %s" | ||||
) | ||||
r48156 | WARN_UNKNOWN_COPY_SOURCE = _( | |||
b"warning: copy source of '%s' not in parents of %s" | ||||
) | ||||
r48158 | WARN_NULLID_COPY_SOURCE = _( | |||
b"warning: %s@%s: copy source revision is nullid %s:%s\n" | ||||
) | ||||
r48147 | ||||
Gregory Szorc
|
r49801 | class verifier: | ||
r42331 | def __init__(self, repo, level=None): | |||
Durham Goode
|
r27444 | self.repo = repo.unfiltered() | ||
self.ui = repo.ui | ||||
Martin von Zweigbergk
|
r39974 | self.match = repo.narrowmatch() | ||
r42331 | if level is None: | |||
level = VERIFY_DEFAULT | ||||
self._level = level | ||||
Durham Goode
|
r27444 | self.badrevs = set() | ||
Matt Mackall
|
r27453 | self.errors = 0 | ||
self.warnings = 0 | ||||
Durham Goode
|
r27444 | self.havecl = len(repo.changelog) > 0 | ||
Gregory Szorc
|
r39280 | self.havemf = len(repo.manifestlog.getstorage(b'')) > 0 | ||
r47910 | self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0 | |||
Matt Harbison
|
r44572 | self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__) | ||
Durham Goode
|
r27444 | self.refersmf = False | ||
Durham Goode
|
r27445 | self.fncachewarned = False | ||
Jun Wu
|
r32288 | # developer config: verify.skipflags | ||
Augie Fackler
|
r43347 | self.skipflags = repo.ui.configint(b'verify', b'skipflags') | ||
Gregory Szorc
|
r37435 | self.warnorphanstorefiles = True | ||
Durham Goode
|
r27444 | |||
r42028 | def _warn(self, msg): | |||
r42027 | """record a "warning" level issue""" | |||
Augie Fackler
|
r43347 | self.ui.warn(msg + b"\n") | ||
Matt Mackall
|
r27453 | self.warnings += 1 | ||
Durham Goode
|
r27446 | |||
r42030 | def _err(self, linkrev, msg, filename=None): | |||
r42029 | """record a "error" level issue""" | |||
Durham Goode
|
r27447 | if linkrev is not None: | ||
self.badrevs.add(linkrev) | ||||
Augie Fackler
|
r43347 | linkrev = b"%d" % linkrev | ||
Durham Goode
|
r27447 | else: | ||
Augie Fackler
|
r43347 | linkrev = b'?' | ||
msg = b"%s: %s" % (linkrev, msg) | ||||
Durham Goode
|
r27447 | if filename: | ||
Augie Fackler
|
r43347 | msg = b"%s@%s" % (filename, msg) | ||
self.ui.warn(b" " + msg + b"\n") | ||||
Matt Mackall
|
r27453 | self.errors += 1 | ||
Durham Goode
|
r27447 | |||
r42032 | def _exc(self, linkrev, msg, inst, filename=None): | |||
r42031 | """record exception raised during the verify process""" | |||
Matt Harbison
|
r47523 | fmsg = stringutil.forcebytestr(inst) | ||
Augie Fackler
|
r36595 | if not fmsg: | ||
fmsg = pycompat.byterepr(inst) | ||||
Augie Fackler
|
r43347 | self._err(linkrev, b"%s: %s" % (msg, fmsg), filename) | ||
Durham Goode
|
r27448 | |||
r42040 | def _checkrevlog(self, obj, name, linkrev): | |||
r42039 | """verify high level property of a revlog | |||
- revlog is present, | ||||
- revlog is non-empty, | ||||
- sizes (index and data) are correct, | ||||
- revlog's format version is correct. | ||||
""" | ||||
Durham Goode
|
r27642 | if not len(obj) and (self.havecl or self.havemf): | ||
Augie Fackler
|
r43347 | self._err(linkrev, _(b"empty or missing %s") % name) | ||
Durham Goode
|
r27642 | return | ||
d = obj.checksize() | ||||
if d[0]: | ||||
Augie Fackler
|
r43347 | self._err(None, _(b"data length off by %d bytes") % d[0], name) | ||
Durham Goode
|
r27642 | if d[1]: | ||
Augie Fackler
|
r43347 | self._err(None, _(b"index contains %d extra bytes") % d[1], name) | ||
Durham Goode
|
r27642 | |||
r47910 | if obj._format_version != revlog.REVLOGV0: | |||
Durham Goode
|
r27642 | if not self.revlogv1: | ||
Augie Fackler
|
r43347 | self._warn(_(b"warning: `%s' uses revlog format 1") % name) | ||
Durham Goode
|
r27642 | elif self.revlogv1: | ||
Augie Fackler
|
r43347 | self._warn(_(b"warning: `%s' uses revlog format 0") % name) | ||
Durham Goode
|
r27642 | |||
r42037 | def _checkentry(self, obj, i, node, seen, linkrevs, f): | |||
r42036 | """verify a single revlog entry | |||
arguments are: | ||||
- obj: the source revlog | ||||
- i: the revision number | ||||
r48142 | - node: the revision node id | |||
r42036 | - seen: nodes previously seen for this revlog | |||
- linkrevs: [changelog-revisions] introducing "node" | ||||
- f: string label ("changelog", "manifest", or filename) | ||||
Performs the following checks: | ||||
- linkrev points to an existing changelog revision, | ||||
- linkrev points to a changelog revision that introduces this revision, | ||||
- linkrev points to the lowest of these changesets, | ||||
- both parents exist in the revlog, | ||||
- the revision is not duplicated. | ||||
Return the linkrev of the revision (or None for changelog's revisions). | ||||
""" | ||||
Durham Goode
|
r27643 | lr = obj.linkrev(obj.rev(node)) | ||
if lr < 0 or (self.havecl and lr not in linkrevs): | ||||
if lr < 0 or lr >= len(self.repo.changelog): | ||||
Augie Fackler
|
r43347 | msg = _(b"rev %d points to nonexistent changeset %d") | ||
Durham Goode
|
r27643 | else: | ||
Augie Fackler
|
r43347 | msg = _(b"rev %d points to unexpected changeset %d") | ||
r42030 | self._err(None, msg % (i, lr), f) | |||
Durham Goode
|
r27643 | if linkrevs: | ||
if f and len(linkrevs) > 1: | ||||
try: | ||||
# attempt to filter down to real linkrevs | ||||
r48143 | linkrevs = [] | |||
for lr in linkrevs: | ||||
if self.lrugetctx(lr)[f].filenode() == node: | ||||
linkrevs.append(lr) | ||||
Durham Goode
|
r27643 | except Exception: | ||
pass | ||||
r48144 | msg = _(b" (expected %s)") | |||
msg %= b" ".join(map(pycompat.bytestr, linkrevs)) | ||||
self._warn(msg) | ||||
Augie Fackler
|
r43346 | lr = None # can't be trusted | ||
Durham Goode
|
r27643 | |||
try: | ||||
p1, p2 = obj.parents(node) | ||||
Joerg Sonnenberger
|
r47771 | if p1 not in seen and p1 != self.repo.nullid: | ||
r48145 | msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node)) | |||
self._err(lr, msg, f) | ||||
Joerg Sonnenberger
|
r47771 | if p2 not in seen and p2 != self.repo.nullid: | ||
r48146 | msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node)) | |||
self._err(lr, msg, f) | ||||
Durham Goode
|
r27643 | except Exception as inst: | ||
Augie Fackler
|
r43347 | self._exc(lr, _(b"checking parents of %s") % short(node), inst, f) | ||
Durham Goode
|
r27643 | |||
if node in seen: | ||||
Augie Fackler
|
r43347 | self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f) | ||
Durham Goode
|
r27643 | seen[node] = i | ||
return lr | ||||
Durham Goode
|
r27444 | def verify(self): | ||
r42033 | """verify the content of the Mercurial repository | |||
This method run all verifications, displaying issues as they are found. | ||||
r42034 | return 1 if any error have been encountered, 0 otherwise.""" | |||
r42035 | # initial validation and generic report | |||
Durham Goode
|
r27444 | repo = self.repo | ||
Durham Goode
|
r27443 | ui = repo.ui | ||
Augie Fackler
|
r43347 | if not repo.url().startswith(b'file:'): | ||
raise error.Abort(_(b"cannot verify bundle or remote repos")) | ||||
Matt Mackall
|
r6752 | |||
Augie Fackler
|
r43347 | if os.path.exists(repo.sjoin(b"journal")): | ||
ui.warn(_(b"abandoned transaction found - run hg recover\n")) | ||||
Durham Goode
|
r27443 | |||
Durham Goode
|
r27648 | if ui.verbose or not self.revlogv1: | ||
Augie Fackler
|
r43346 | ui.status( | ||
Augie Fackler
|
r43347 | _(b"repository uses revlog format %d\n") | ||
Augie Fackler
|
r43346 | % (self.revlogv1 and 1 or 0) | ||
) | ||||
Durham Goode
|
r27443 | |||
r42035 | # data verification | |||
Martin von Zweigbergk
|
r27695 | mflinkrevs, filelinkrevs = self._verifychangelog() | ||
filenodes = self._verifymanifest(mflinkrevs) | ||||
Martin von Zweigbergk
|
r28111 | del mflinkrevs | ||
self._crosscheckfiles(filelinkrevs, filenodes) | ||||
Durham Goode
|
r27647 | totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs) | ||
r42035 | # final report | |||
Augie Fackler
|
r43346 | ui.status( | ||
Augie Fackler
|
r43347 | _(b"checked %d changesets with %d changes to %d files\n") | ||
Augie Fackler
|
r43346 | % (len(repo.changelog), filerevisions, totalfiles) | ||
) | ||||
Durham Goode
|
r27647 | if self.warnings: | ||
Augie Fackler
|
r43347 | ui.warn(_(b"%d warnings encountered!\n") % self.warnings) | ||
Durham Goode
|
r27647 | if self.fncachewarned: | ||
r48147 | ui.warn(HINT_FNCACHE) | |||
Durham Goode
|
r27647 | if self.errors: | ||
Augie Fackler
|
r43347 | ui.warn(_(b"%d integrity errors encountered!\n") % self.errors) | ||
Durham Goode
|
r27648 | if self.badrevs: | ||
r48148 | msg = _(b"(first damaged changeset appears to be %d)\n") | |||
msg %= min(self.badrevs) | ||||
ui.warn(msg) | ||||
Durham Goode
|
r27647 | return 1 | ||
r42034 | return 0 | |||
Durham Goode
|
r27647 | |||
Martin von Zweigbergk
|
r27695 | def _verifychangelog(self): | ||
r42041 | """verify the changelog of a repository | |||
The following checks are performed: | ||||
- all of `_checkrevlog` checks, | ||||
- all of `_checkentry` checks (for each revisions), | ||||
- each revision can be read. | ||||
The function returns some of the data observed in the changesets as a | ||||
(mflinkrevs, filelinkrevs) tuples: | ||||
- mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping | ||||
- filelinkrevs: is a { file-path -> [changelog-rev] } mapping | ||||
If a matcher was specified, filelinkrevs will only contains matched | ||||
files. | ||||
""" | ||||
Durham Goode
|
r27647 | ui = self.ui | ||
repo = self.repo | ||||
Martin von Zweigbergk
|
r30866 | match = self.match | ||
Durham Goode
|
r27647 | cl = repo.changelog | ||
Augie Fackler
|
r43347 | ui.status(_(b"checking changesets\n")) | ||
Martin von Zweigbergk
|
r27695 | mflinkrevs = {} | ||
filelinkrevs = {} | ||||
Durham Goode
|
r27443 | seen = {} | ||
Augie Fackler
|
r43347 | self._checkrevlog(cl, b"changelog", 0) | ||
Augie Fackler
|
r43346 | progress = ui.makeprogress( | ||
Augie Fackler
|
r43347 | _(b'checking'), unit=_(b'changesets'), total=len(repo) | ||
Augie Fackler
|
r43346 | ) | ||
Durham Goode
|
r27443 | for i in repo: | ||
Martin von Zweigbergk
|
r38416 | progress.update(i) | ||
Durham Goode
|
r27443 | n = cl.node(i) | ||
Augie Fackler
|
r43347 | self._checkentry(cl, i, n, seen, [i], b"changelog") | ||
Matt Mackall
|
r2778 | |||
Durham Goode
|
r27443 | try: | ||
changes = cl.read(n) | ||||
Joerg Sonnenberger
|
r47771 | if changes[0] != self.repo.nullid: | ||
Durham Goode
|
r27443 | mflinkrevs.setdefault(changes[0], []).append(i) | ||
Durham Goode
|
r27444 | self.refersmf = True | ||
Durham Goode
|
r27443 | for f in changes[3]: | ||
Martin von Zweigbergk
|
r30866 | if match(f): | ||
Durham Goode
|
r27443 | filelinkrevs.setdefault(_normpath(f), []).append(i) | ||
except Exception as inst: | ||||
Durham Goode
|
r27444 | self.refersmf = True | ||
Augie Fackler
|
r43347 | self._exc(i, _(b"unpacking changeset %s") % short(n), inst) | ||
Martin von Zweigbergk
|
r38416 | progress.complete() | ||
Martin von Zweigbergk
|
r27695 | return mflinkrevs, filelinkrevs | ||
Matt Mackall
|
r2778 | |||
Augie Fackler
|
r43346 | def _verifymanifest( | ||
Augie Fackler
|
r43347 | self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None | ||
Augie Fackler
|
r43346 | ): | ||
r42042 | """verify the manifestlog content | |||
Inputs: | ||||
- mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping | ||||
- dir: a subdirectory to check (for tree manifest repo) | ||||
- storefiles: set of currently "orphan" files. | ||||
- subdirprogress: a progress object | ||||
This function checks: | ||||
* all of `_checkrevlog` checks (for all manifest related revlogs) | ||||
* all of `_checkentry` checks (for all manifest related revisions) | ||||
* nodes for subdirectory exists in the sub-directory manifest | ||||
* each manifest entries have a file path | ||||
* each manifest node refered in mflinkrevs exist in the manifest log | ||||
If tree manifest is in use and a matchers is specified, only the | ||||
sub-directories matching it will be verified. | ||||
return a two level mapping: | ||||
{"path" -> { filenode -> changelog-revision}} | ||||
This mapping primarily contains entries for every files in the | ||||
repository. In addition, when tree-manifest is used, it also contains | ||||
sub-directory entries. | ||||
If a matcher is provided, only matching paths will be included. | ||||
""" | ||||
Durham Goode
|
r27646 | repo = self.repo | ||
ui = self.ui | ||||
Martin von Zweigbergk
|
r30866 | match = self.match | ||
Durham Goode
|
r30295 | mfl = self.repo.manifestlog | ||
Gregory Szorc
|
r39280 | mf = mfl.getstorage(dir) | ||
Durham Goode
|
r27646 | |||
Martin von Zweigbergk
|
r28203 | if not dir: | ||
Augie Fackler
|
r43347 | self.ui.status(_(b"checking manifests\n")) | ||
Martin von Zweigbergk
|
r28203 | |||
Martin von Zweigbergk
|
r27695 | filenodes = {} | ||
Martin von Zweigbergk
|
r28203 | subdirnodes = {} | ||
Durham Goode
|
r27443 | seen = {} | ||
Augie Fackler
|
r43347 | label = b"manifest" | ||
Martin von Zweigbergk
|
r28203 | if dir: | ||
label = dir | ||||
Martin von Zweigbergk
|
r28204 | revlogfiles = mf.files() | ||
storefiles.difference_update(revlogfiles) | ||||
Augie Fackler
|
r43346 | if subdirprogress: # should be true since we're in a subdirectory | ||
Martin von Zweigbergk
|
r38415 | subdirprogress.increment() | ||
Durham Goode
|
r27444 | if self.refersmf: | ||
Durham Goode
|
r27443 | # Do not check manifest if there are only changelog entries with | ||
# null manifests. | ||||
r47909 | self._checkrevlog(mf._revlog, label, 0) | |||
Augie Fackler
|
r43346 | progress = ui.makeprogress( | ||
Augie Fackler
|
r43347 | _(b'checking'), unit=_(b'manifests'), total=len(mf) | ||
Augie Fackler
|
r43346 | ) | ||
Durham Goode
|
r27443 | for i in mf: | ||
Martin von Zweigbergk
|
r28203 | if not dir: | ||
Martin von Zweigbergk
|
r38416 | progress.update(i) | ||
Durham Goode
|
r27443 | n = mf.node(i) | ||
r42037 | lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label) | |||
Durham Goode
|
r27443 | if n in mflinkrevs: | ||
del mflinkrevs[n] | ||||
Martin von Zweigbergk
|
r28203 | elif dir: | ||
r48149 | msg = _(b"%s not in parent-directory manifest") % short(n) | |||
self._err(lr, msg, label) | ||||
Durham Goode
|
r27443 | else: | ||
Augie Fackler
|
r43347 | self._err(lr, _(b"%s not in changesets") % short(n), label) | ||
Matt Mackall
|
r2778 | |||
Durham Goode
|
r27443 | try: | ||
Durham Goode
|
r30295 | mfdelta = mfl.get(dir, n).readdelta(shallow=True) | ||
for f, fn, fl in mfdelta.iterentries(): | ||||
Durham Goode
|
r27443 | if not f: | ||
Augie Fackler
|
r43347 | self._err(lr, _(b"entry without name in manifest")) | ||
elif f == b"/dev/null": # ignore this in very old repos | ||||
Martin von Zweigbergk
|
r28203 | continue | ||
fullpath = dir + _normpath(f) | ||||
Augie Fackler
|
r43347 | if fl == b't': | ||
Martin von Zweigbergk
|
r30866 | if not match.visitdir(fullpath): | ||
continue | ||||
r48151 | sdn = subdirnodes.setdefault(fullpath + b'/', {}) | |||
sdn.setdefault(fn, []).append(lr) | ||||
Martin von Zweigbergk
|
r28203 | else: | ||
Martin von Zweigbergk
|
r30866 | if not match(fullpath): | ||
continue | ||||
Martin von Zweigbergk
|
r28203 | filenodes.setdefault(fullpath, {}).setdefault(fn, lr) | ||
Durham Goode
|
r27443 | except Exception as inst: | ||
Augie Fackler
|
r43347 | self._exc(lr, _(b"reading delta %s") % short(n), inst, label) | ||
r42333 | if self._level >= VERIFY_FULL: | |||
try: | ||||
# Various issues can affect manifest. So we read each full | ||||
# text from storage. This triggers the checks from the core | ||||
# code (eg: hash verification, filename are ordered, etc.) | ||||
mfdelta = mfl.get(dir, n).read() | ||||
except Exception as inst: | ||||
r48150 | msg = _(b"reading full manifest %s") % short(n) | |||
self._exc(lr, msg, inst, label) | ||||
r42333 | ||||
Martin von Zweigbergk
|
r28203 | if not dir: | ||
Martin von Zweigbergk
|
r38416 | progress.complete() | ||
Durham Goode
|
r27443 | |||
Martin von Zweigbergk
|
r28111 | if self.havemf: | ||
r42043 | # since we delete entry in `mflinkrevs` during iteration, any | |||
# remaining entries are "missing". We need to issue errors for them. | ||||
changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]] | ||||
for c, m in sorted(changesetpairs): | ||||
Martin von Zweigbergk
|
r28203 | if dir: | ||
r48152 | self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label) | |||
Martin von Zweigbergk
|
r28203 | else: | ||
r48153 | msg = _(b"changeset refers to unknown revision %s") | |||
msg %= short(m) | ||||
self._err(c, msg, label) | ||||
Martin von Zweigbergk
|
r28203 | |||
if not dir and subdirnodes: | ||||
Augie Fackler
|
r43347 | self.ui.status(_(b"checking directory manifests\n")) | ||
Martin von Zweigbergk
|
r28204 | storefiles = set() | ||
Martin von Zweigbergk
|
r28205 | subdirs = set() | ||
Martin von Zweigbergk
|
r28204 | revlogv1 = self.revlogv1 | ||
Valentin Gatien-Baron
|
r48691 | undecodable = [] | ||
for t, f, size in repo.store.datafiles(undecodable=undecodable): | ||||
if (size > 0 or not revlogv1) and f.startswith(b'meta/'): | ||||
Martin von Zweigbergk
|
r28204 | storefiles.add(_normpath(f)) | ||
Martin von Zweigbergk
|
r28205 | subdirs.add(os.path.dirname(f)) | ||
Valentin Gatien-Baron
|
r48691 | for f in undecodable: | ||
self._err(None, _(b"cannot decode filename '%s'") % f) | ||||
Augie Fackler
|
r43346 | subdirprogress = ui.makeprogress( | ||
Augie Fackler
|
r43347 | _(b'checking'), unit=_(b'manifests'), total=len(subdirs) | ||
Augie Fackler
|
r43346 | ) | ||
Martin von Zweigbergk
|
r28204 | |||
Gregory Szorc
|
r49768 | for subdir, linkrevs in subdirnodes.items(): | ||
Augie Fackler
|
r43346 | subdirfilenodes = self._verifymanifest( | ||
linkrevs, subdir, storefiles, subdirprogress | ||||
) | ||||
Gregory Szorc
|
r49768 | for f, onefilenodes in subdirfilenodes.items(): | ||
Martin von Zweigbergk
|
r28203 | filenodes.setdefault(f, {}).update(onefilenodes) | ||
Martin von Zweigbergk
|
r28111 | |||
Martin von Zweigbergk
|
r28204 | if not dir and subdirnodes: | ||
Matt Harbison
|
r47548 | assert subdirprogress is not None # help pytype | ||
Martin von Zweigbergk
|
r38415 | subdirprogress.complete() | ||
Gregory Szorc
|
r37435 | if self.warnorphanstorefiles: | ||
for f in sorted(storefiles): | ||||
Augie Fackler
|
r43347 | self._warn(_(b"warning: orphan data file '%s'") % f) | ||
Martin von Zweigbergk
|
r28204 | |||
Martin von Zweigbergk
|
r27695 | return filenodes | ||
Durham Goode
|
r27645 | |||
Martin von Zweigbergk
|
r28111 | def _crosscheckfiles(self, filelinkrevs, filenodes): | ||
Durham Goode
|
r27645 | repo = self.repo | ||
ui = self.ui | ||||
Augie Fackler
|
r43347 | ui.status(_(b"crosschecking files in changesets and manifests\n")) | ||
Matt Mackall
|
r2778 | |||
Martin von Zweigbergk
|
r28111 | total = len(filelinkrevs) + len(filenodes) | ||
Augie Fackler
|
r43346 | progress = ui.makeprogress( | ||
Augie Fackler
|
r43347 | _(b'crosschecking'), unit=_(b'files'), total=total | ||
Augie Fackler
|
r43346 | ) | ||
Durham Goode
|
r27645 | if self.havemf: | ||
Durham Goode
|
r27443 | for f in sorted(filelinkrevs): | ||
Martin von Zweigbergk
|
r38416 | progress.increment() | ||
Durham Goode
|
r27443 | if f not in filenodes: | ||
lr = filelinkrevs[f][0] | ||||
Augie Fackler
|
r43347 | self._err(lr, _(b"in changeset but not in manifest"), f) | ||
Adrian Buehlmann
|
r6892 | |||
Durham Goode
|
r27645 | if self.havecl: | ||
Durham Goode
|
r27443 | for f in sorted(filenodes): | ||
Martin von Zweigbergk
|
r38416 | progress.increment() | ||
Durham Goode
|
r27443 | if f not in filelinkrevs: | ||
try: | ||||
fl = repo.file(f) | ||||
lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]]) | ||||
except Exception: | ||||
lr = None | ||||
Augie Fackler
|
r43347 | self._err(lr, _(b"in manifest but not in changeset"), f) | ||
Durham Goode
|
r27443 | |||
Martin von Zweigbergk
|
r38416 | progress.complete() | ||
Henrik Stuart
|
r8291 | |||
Durham Goode
|
r27644 | def _verifyfiles(self, filenodes, filelinkrevs): | ||
repo = self.repo | ||||
ui = self.ui | ||||
lrugetctx = self.lrugetctx | ||||
revlogv1 = self.revlogv1 | ||||
havemf = self.havemf | ||||
Augie Fackler
|
r43347 | ui.status(_(b"checking files\n")) | ||
Henrik Stuart
|
r8291 | |||
Durham Goode
|
r27443 | storefiles = set() | ||
Valentin Gatien-Baron
|
r48691 | undecodable = [] | ||
for t, f, size in repo.store.datafiles(undecodable=undecodable): | ||||
if (size > 0 or not revlogv1) and f.startswith(b'data/'): | ||||
Durham Goode
|
r27443 | storefiles.add(_normpath(f)) | ||
Valentin Gatien-Baron
|
r48691 | for f in undecodable: | ||
self._err(None, _(b"cannot decode filename '%s'") % f) | ||||
Adrian Buehlmann
|
r6892 | |||
Gregory Szorc
|
r39878 | state = { | ||
Gregory Szorc
|
r39881 | # TODO this assumes revlog storage for changelog. | ||
r47910 | b'expectedversion': self.repo.changelog._format_version, | |||
Augie Fackler
|
r43347 | b'skipflags': self.skipflags, | ||
Gregory Szorc
|
r39908 | # experimental config: censor.policy | ||
Augie Fackler
|
r43347 | b'erroroncensored': ui.config(b'censor', b'policy') == b'abort', | ||
Gregory Szorc
|
r39878 | } | ||
Durham Goode
|
r27443 | files = sorted(set(filenodes) | set(filelinkrevs)) | ||
Durham Goode
|
r27644 | revisions = 0 | ||
Augie Fackler
|
r43346 | progress = ui.makeprogress( | ||
Augie Fackler
|
r43347 | _(b'checking'), unit=_(b'files'), total=len(files) | ||
Augie Fackler
|
r43346 | ) | ||
Durham Goode
|
r27443 | for i, f in enumerate(files): | ||
Martin von Zweigbergk
|
r38416 | progress.update(i, item=f) | ||
Adrian Buehlmann
|
r6892 | try: | ||
Durham Goode
|
r27443 | linkrevs = filelinkrevs[f] | ||
Adrian Buehlmann
|
r6892 | except KeyError: | ||
Durham Goode
|
r27443 | # in manifest but not in changelog | ||
linkrevs = [] | ||||
Matt Mackall
|
r2778 | |||
Durham Goode
|
r27443 | if linkrevs: | ||
lr = linkrevs[0] | ||||
else: | ||||
lr = None | ||||
Matt Mackall
|
r2778 | |||
Matt Mackall
|
r3744 | try: | ||
Durham Goode
|
r27443 | fl = repo.file(f) | ||
Gregory Szorc
|
r39813 | except error.StorageError as e: | ||
Augie Fackler
|
r43347 | self._err(lr, _(b"broken revlog! (%s)") % e, f) | ||
Durham Goode
|
r27443 | continue | ||
for ff in fl.files(): | ||||
try: | ||||
storefiles.remove(ff) | ||||
except KeyError: | ||||
Gregory Szorc
|
r37435 | if self.warnorphanstorefiles: | ||
r48154 | msg = _(b" warning: revlog '%s' not in fncache!") | |||
self._warn(msg % ff) | ||||
Gregory Szorc
|
r37435 | self.fncachewarned = True | ||
Durham Goode
|
r27443 | |||
Gregory Szorc
|
r39878 | if not len(fl) and (self.havecl or self.havemf): | ||
Augie Fackler
|
r43347 | self._err(lr, _(b"empty or missing %s") % f) | ||
Gregory Szorc
|
r39878 | else: | ||
Gregory Szorc
|
r39908 | # Guard against implementations not setting this. | ||
Augie Fackler
|
r43347 | state[b'skipread'] = set() | ||
Matt Harbison
|
r44530 | state[b'safe_renamed'] = set() | ||
Gregory Szorc
|
r39878 | for problem in fl.verifyintegrity(state): | ||
Gregory Szorc
|
r39908 | if problem.node is not None: | ||
linkrev = fl.linkrev(fl.rev(problem.node)) | ||||
else: | ||||
linkrev = None | ||||
Gregory Szorc
|
r39878 | if problem.warning: | ||
r42028 | self._warn(problem.warning) | |||
Gregory Szorc
|
r39878 | elif problem.error: | ||
r48155 | linkrev_msg = linkrev if linkrev is not None else lr | |||
self._err(linkrev_msg, problem.error, f) | ||||
Gregory Szorc
|
r39878 | else: | ||
raise error.ProgrammingError( | ||||
Augie Fackler
|
r43347 | b'problem instance does not set warning or error ' | ||
b'attribute: %s' % problem.msg | ||||
Augie Fackler
|
r43346 | ) | ||
Gregory Szorc
|
r39878 | |||
Durham Goode
|
r27443 | seen = {} | ||
for i in fl: | ||||
revisions += 1 | ||||
n = fl.node(i) | ||||
r42037 | lr = self._checkentry(fl, i, n, seen, linkrevs, f) | |||
Durham Goode
|
r27443 | if f in filenodes: | ||
if havemf and n not in filenodes[f]: | ||||
Augie Fackler
|
r43347 | self._err(lr, _(b"%s not in manifests") % (short(n)), f) | ||
Patrick Mezard
|
r6534 | else: | ||
Durham Goode
|
r27443 | del filenodes[f][n] | ||
Matt Harbison
|
r44530 | if n in state[b'skipread'] and n not in state[b'safe_renamed']: | ||
Gregory Szorc
|
r39908 | continue | ||
Matt Mackall
|
r3744 | |||
Durham Goode
|
r27443 | # check renames | ||
try: | ||||
Matt Harbison
|
r44408 | # This requires resolving fulltext (at least on revlogs, | ||
# though not with LFS revisions). We may want | ||||
# ``verifyintegrity()`` to pass a set of nodes with | ||||
Gregory Szorc
|
r39908 | # rename metadata as an optimization. | ||
rp = fl.renamed(n) | ||||
Durham Goode
|
r27443 | if rp: | ||
if lr is not None and ui.verbose: | ||||
ctx = lrugetctx(lr) | ||||
Martin von Zweigbergk
|
r36357 | if not any(rp[0] in pctx for pctx in ctx.parents()): | ||
r48156 | self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx)) | |||
Durham Goode
|
r27443 | fl2 = repo.file(rp[0]) | ||
if not len(fl2): | ||||
r48157 | m = _(b"empty or missing copy source revlog %s:%s") | |||
self._err(lr, m % (rp[0], short(rp[1])), f) | ||||
Joerg Sonnenberger
|
r47771 | elif rp[1] == self.repo.nullid: | ||
r48158 | msg = WARN_NULLID_COPY_SOURCE | |||
msg %= (f, lr, rp[0], short(rp[1])) | ||||
ui.note(msg) | ||||
Durham Goode
|
r27443 | else: | ||
fl2.rev(rp[1]) | ||||
except Exception as inst: | ||||
Augie Fackler
|
r43346 | self._exc( | ||
Augie Fackler
|
r43347 | lr, _(b"checking rename of %s") % short(n), inst, f | ||
Augie Fackler
|
r43346 | ) | ||
Adrian Buehlmann
|
r6892 | |||
Durham Goode
|
r27443 | # cross-check | ||
if f in filenodes: | ||||
Gregory Szorc
|
r49786 | fns = [(v, k) for k, v in filenodes[f].items()] | ||
Durham Goode
|
r27443 | for lr, node in sorted(fns): | ||
r48161 | msg = _(b"manifest refers to unknown revision %s") | |||
self._err(lr, msg % short(node), f) | ||||
Martin von Zweigbergk
|
r38416 | progress.complete() | ||
Durham Goode
|
r27443 | |||
Gregory Szorc
|
r37435 | if self.warnorphanstorefiles: | ||
for f in sorted(storefiles): | ||||
Augie Fackler
|
r43347 | self._warn(_(b"warning: orphan data file '%s'") % f) | ||
Durham Goode
|
r27443 | |||
Durham Goode
|
r27644 | return len(files), revisions | ||