##// END OF EJS Templates
typing: add an assertion to verify.py to appease pytype...
Matt Harbison -
r47548:51378966 stable
parent child Browse files
Show More
@@ -1,631 +1,632
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import (
13 from .node import (
14 nullid,
14 nullid,
15 short,
15 short,
16 )
16 )
17 from .utils import (
17 from .utils import (
18 stringutil,
18 stringutil,
19 )
19 )
20
20
21 from . import (
21 from . import (
22 error,
22 error,
23 pycompat,
23 pycompat,
24 revlog,
24 revlog,
25 util,
25 util,
26 )
26 )
27
27
28 VERIFY_DEFAULT = 0
28 VERIFY_DEFAULT = 0
29 VERIFY_FULL = 1
29 VERIFY_FULL = 1
30
30
31
31
32 def verify(repo, level=None):
32 def verify(repo, level=None):
33 with repo.lock():
33 with repo.lock():
34 v = verifier(repo, level)
34 v = verifier(repo, level)
35 return v.verify()
35 return v.verify()
36
36
37
37
38 def _normpath(f):
38 def _normpath(f):
39 # under hg < 2.4, convert didn't sanitize paths properly, so a
39 # under hg < 2.4, convert didn't sanitize paths properly, so a
40 # converted repo may contain repeated slashes
40 # converted repo may contain repeated slashes
41 while b'//' in f:
41 while b'//' in f:
42 f = f.replace(b'//', b'/')
42 f = f.replace(b'//', b'/')
43 return f
43 return f
44
44
45
45
46 class verifier(object):
46 class verifier(object):
47 def __init__(self, repo, level=None):
47 def __init__(self, repo, level=None):
48 self.repo = repo.unfiltered()
48 self.repo = repo.unfiltered()
49 self.ui = repo.ui
49 self.ui = repo.ui
50 self.match = repo.narrowmatch()
50 self.match = repo.narrowmatch()
51 if level is None:
51 if level is None:
52 level = VERIFY_DEFAULT
52 level = VERIFY_DEFAULT
53 self._level = level
53 self._level = level
54 self.badrevs = set()
54 self.badrevs = set()
55 self.errors = 0
55 self.errors = 0
56 self.warnings = 0
56 self.warnings = 0
57 self.havecl = len(repo.changelog) > 0
57 self.havecl = len(repo.changelog) > 0
58 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
58 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
59 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
59 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
60 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
60 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
61 self.refersmf = False
61 self.refersmf = False
62 self.fncachewarned = False
62 self.fncachewarned = False
63 # developer config: verify.skipflags
63 # developer config: verify.skipflags
64 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
64 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
65 self.warnorphanstorefiles = True
65 self.warnorphanstorefiles = True
66
66
67 def _warn(self, msg):
67 def _warn(self, msg):
68 """record a "warning" level issue"""
68 """record a "warning" level issue"""
69 self.ui.warn(msg + b"\n")
69 self.ui.warn(msg + b"\n")
70 self.warnings += 1
70 self.warnings += 1
71
71
72 def _err(self, linkrev, msg, filename=None):
72 def _err(self, linkrev, msg, filename=None):
73 """record a "error" level issue"""
73 """record a "error" level issue"""
74 if linkrev is not None:
74 if linkrev is not None:
75 self.badrevs.add(linkrev)
75 self.badrevs.add(linkrev)
76 linkrev = b"%d" % linkrev
76 linkrev = b"%d" % linkrev
77 else:
77 else:
78 linkrev = b'?'
78 linkrev = b'?'
79 msg = b"%s: %s" % (linkrev, msg)
79 msg = b"%s: %s" % (linkrev, msg)
80 if filename:
80 if filename:
81 msg = b"%s@%s" % (filename, msg)
81 msg = b"%s@%s" % (filename, msg)
82 self.ui.warn(b" " + msg + b"\n")
82 self.ui.warn(b" " + msg + b"\n")
83 self.errors += 1
83 self.errors += 1
84
84
85 def _exc(self, linkrev, msg, inst, filename=None):
85 def _exc(self, linkrev, msg, inst, filename=None):
86 """record exception raised during the verify process"""
86 """record exception raised during the verify process"""
87 fmsg = stringutil.forcebytestr(inst)
87 fmsg = stringutil.forcebytestr(inst)
88 if not fmsg:
88 if not fmsg:
89 fmsg = pycompat.byterepr(inst)
89 fmsg = pycompat.byterepr(inst)
90 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
90 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
91
91
92 def _checkrevlog(self, obj, name, linkrev):
92 def _checkrevlog(self, obj, name, linkrev):
93 """verify high level property of a revlog
93 """verify high level property of a revlog
94
94
95 - revlog is present,
95 - revlog is present,
96 - revlog is non-empty,
96 - revlog is non-empty,
97 - sizes (index and data) are correct,
97 - sizes (index and data) are correct,
98 - revlog's format version is correct.
98 - revlog's format version is correct.
99 """
99 """
100 if not len(obj) and (self.havecl or self.havemf):
100 if not len(obj) and (self.havecl or self.havemf):
101 self._err(linkrev, _(b"empty or missing %s") % name)
101 self._err(linkrev, _(b"empty or missing %s") % name)
102 return
102 return
103
103
104 d = obj.checksize()
104 d = obj.checksize()
105 if d[0]:
105 if d[0]:
106 self._err(None, _(b"data length off by %d bytes") % d[0], name)
106 self._err(None, _(b"data length off by %d bytes") % d[0], name)
107 if d[1]:
107 if d[1]:
108 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
108 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
109
109
110 if obj.version != revlog.REVLOGV0:
110 if obj.version != revlog.REVLOGV0:
111 if not self.revlogv1:
111 if not self.revlogv1:
112 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
112 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
113 elif self.revlogv1:
113 elif self.revlogv1:
114 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
114 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
115
115
116 def _checkentry(self, obj, i, node, seen, linkrevs, f):
116 def _checkentry(self, obj, i, node, seen, linkrevs, f):
117 """verify a single revlog entry
117 """verify a single revlog entry
118
118
119 arguments are:
119 arguments are:
120 - obj: the source revlog
120 - obj: the source revlog
121 - i: the revision number
121 - i: the revision number
122 - node: the revision node id
122 - node: the revision node id
123 - seen: nodes previously seen for this revlog
123 - seen: nodes previously seen for this revlog
124 - linkrevs: [changelog-revisions] introducing "node"
124 - linkrevs: [changelog-revisions] introducing "node"
125 - f: string label ("changelog", "manifest", or filename)
125 - f: string label ("changelog", "manifest", or filename)
126
126
127 Performs the following checks:
127 Performs the following checks:
128 - linkrev points to an existing changelog revision,
128 - linkrev points to an existing changelog revision,
129 - linkrev points to a changelog revision that introduces this revision,
129 - linkrev points to a changelog revision that introduces this revision,
130 - linkrev points to the lowest of these changesets,
130 - linkrev points to the lowest of these changesets,
131 - both parents exist in the revlog,
131 - both parents exist in the revlog,
132 - the revision is not duplicated.
132 - the revision is not duplicated.
133
133
134 Return the linkrev of the revision (or None for changelog's revisions).
134 Return the linkrev of the revision (or None for changelog's revisions).
135 """
135 """
136 lr = obj.linkrev(obj.rev(node))
136 lr = obj.linkrev(obj.rev(node))
137 if lr < 0 or (self.havecl and lr not in linkrevs):
137 if lr < 0 or (self.havecl and lr not in linkrevs):
138 if lr < 0 or lr >= len(self.repo.changelog):
138 if lr < 0 or lr >= len(self.repo.changelog):
139 msg = _(b"rev %d points to nonexistent changeset %d")
139 msg = _(b"rev %d points to nonexistent changeset %d")
140 else:
140 else:
141 msg = _(b"rev %d points to unexpected changeset %d")
141 msg = _(b"rev %d points to unexpected changeset %d")
142 self._err(None, msg % (i, lr), f)
142 self._err(None, msg % (i, lr), f)
143 if linkrevs:
143 if linkrevs:
144 if f and len(linkrevs) > 1:
144 if f and len(linkrevs) > 1:
145 try:
145 try:
146 # attempt to filter down to real linkrevs
146 # attempt to filter down to real linkrevs
147 linkrevs = [
147 linkrevs = [
148 l
148 l
149 for l in linkrevs
149 for l in linkrevs
150 if self.lrugetctx(l)[f].filenode() == node
150 if self.lrugetctx(l)[f].filenode() == node
151 ]
151 ]
152 except Exception:
152 except Exception:
153 pass
153 pass
154 self._warn(
154 self._warn(
155 _(b" (expected %s)")
155 _(b" (expected %s)")
156 % b" ".join(map(pycompat.bytestr, linkrevs))
156 % b" ".join(map(pycompat.bytestr, linkrevs))
157 )
157 )
158 lr = None # can't be trusted
158 lr = None # can't be trusted
159
159
160 try:
160 try:
161 p1, p2 = obj.parents(node)
161 p1, p2 = obj.parents(node)
162 if p1 not in seen and p1 != nullid:
162 if p1 not in seen and p1 != nullid:
163 self._err(
163 self._err(
164 lr,
164 lr,
165 _(b"unknown parent 1 %s of %s") % (short(p1), short(node)),
165 _(b"unknown parent 1 %s of %s") % (short(p1), short(node)),
166 f,
166 f,
167 )
167 )
168 if p2 not in seen and p2 != nullid:
168 if p2 not in seen and p2 != nullid:
169 self._err(
169 self._err(
170 lr,
170 lr,
171 _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
171 _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
172 f,
172 f,
173 )
173 )
174 except Exception as inst:
174 except Exception as inst:
175 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
175 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
176
176
177 if node in seen:
177 if node in seen:
178 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
178 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
179 seen[node] = i
179 seen[node] = i
180 return lr
180 return lr
181
181
182 def verify(self):
182 def verify(self):
183 """verify the content of the Mercurial repository
183 """verify the content of the Mercurial repository
184
184
185 This method run all verifications, displaying issues as they are found.
185 This method run all verifications, displaying issues as they are found.
186
186
187 return 1 if any error have been encountered, 0 otherwise."""
187 return 1 if any error have been encountered, 0 otherwise."""
188 # initial validation and generic report
188 # initial validation and generic report
189 repo = self.repo
189 repo = self.repo
190 ui = repo.ui
190 ui = repo.ui
191 if not repo.url().startswith(b'file:'):
191 if not repo.url().startswith(b'file:'):
192 raise error.Abort(_(b"cannot verify bundle or remote repos"))
192 raise error.Abort(_(b"cannot verify bundle or remote repos"))
193
193
194 if os.path.exists(repo.sjoin(b"journal")):
194 if os.path.exists(repo.sjoin(b"journal")):
195 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
195 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
196
196
197 if ui.verbose or not self.revlogv1:
197 if ui.verbose or not self.revlogv1:
198 ui.status(
198 ui.status(
199 _(b"repository uses revlog format %d\n")
199 _(b"repository uses revlog format %d\n")
200 % (self.revlogv1 and 1 or 0)
200 % (self.revlogv1 and 1 or 0)
201 )
201 )
202
202
203 # data verification
203 # data verification
204 mflinkrevs, filelinkrevs = self._verifychangelog()
204 mflinkrevs, filelinkrevs = self._verifychangelog()
205 filenodes = self._verifymanifest(mflinkrevs)
205 filenodes = self._verifymanifest(mflinkrevs)
206 del mflinkrevs
206 del mflinkrevs
207 self._crosscheckfiles(filelinkrevs, filenodes)
207 self._crosscheckfiles(filelinkrevs, filenodes)
208 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
208 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
209
209
210 # final report
210 # final report
211 ui.status(
211 ui.status(
212 _(b"checked %d changesets with %d changes to %d files\n")
212 _(b"checked %d changesets with %d changes to %d files\n")
213 % (len(repo.changelog), filerevisions, totalfiles)
213 % (len(repo.changelog), filerevisions, totalfiles)
214 )
214 )
215 if self.warnings:
215 if self.warnings:
216 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
216 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
217 if self.fncachewarned:
217 if self.fncachewarned:
218 ui.warn(
218 ui.warn(
219 _(
219 _(
220 b'hint: run "hg debugrebuildfncache" to recover from '
220 b'hint: run "hg debugrebuildfncache" to recover from '
221 b'corrupt fncache\n'
221 b'corrupt fncache\n'
222 )
222 )
223 )
223 )
224 if self.errors:
224 if self.errors:
225 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
225 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
226 if self.badrevs:
226 if self.badrevs:
227 ui.warn(
227 ui.warn(
228 _(b"(first damaged changeset appears to be %d)\n")
228 _(b"(first damaged changeset appears to be %d)\n")
229 % min(self.badrevs)
229 % min(self.badrevs)
230 )
230 )
231 return 1
231 return 1
232 return 0
232 return 0
233
233
234 def _verifychangelog(self):
234 def _verifychangelog(self):
235 """verify the changelog of a repository
235 """verify the changelog of a repository
236
236
237 The following checks are performed:
237 The following checks are performed:
238 - all of `_checkrevlog` checks,
238 - all of `_checkrevlog` checks,
239 - all of `_checkentry` checks (for each revisions),
239 - all of `_checkentry` checks (for each revisions),
240 - each revision can be read.
240 - each revision can be read.
241
241
242 The function returns some of the data observed in the changesets as a
242 The function returns some of the data observed in the changesets as a
243 (mflinkrevs, filelinkrevs) tuples:
243 (mflinkrevs, filelinkrevs) tuples:
244 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
244 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
245 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
245 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
246
246
247 If a matcher was specified, filelinkrevs will only contains matched
247 If a matcher was specified, filelinkrevs will only contains matched
248 files.
248 files.
249 """
249 """
250 ui = self.ui
250 ui = self.ui
251 repo = self.repo
251 repo = self.repo
252 match = self.match
252 match = self.match
253 cl = repo.changelog
253 cl = repo.changelog
254
254
255 ui.status(_(b"checking changesets\n"))
255 ui.status(_(b"checking changesets\n"))
256 mflinkrevs = {}
256 mflinkrevs = {}
257 filelinkrevs = {}
257 filelinkrevs = {}
258 seen = {}
258 seen = {}
259 self._checkrevlog(cl, b"changelog", 0)
259 self._checkrevlog(cl, b"changelog", 0)
260 progress = ui.makeprogress(
260 progress = ui.makeprogress(
261 _(b'checking'), unit=_(b'changesets'), total=len(repo)
261 _(b'checking'), unit=_(b'changesets'), total=len(repo)
262 )
262 )
263 for i in repo:
263 for i in repo:
264 progress.update(i)
264 progress.update(i)
265 n = cl.node(i)
265 n = cl.node(i)
266 self._checkentry(cl, i, n, seen, [i], b"changelog")
266 self._checkentry(cl, i, n, seen, [i], b"changelog")
267
267
268 try:
268 try:
269 changes = cl.read(n)
269 changes = cl.read(n)
270 if changes[0] != nullid:
270 if changes[0] != nullid:
271 mflinkrevs.setdefault(changes[0], []).append(i)
271 mflinkrevs.setdefault(changes[0], []).append(i)
272 self.refersmf = True
272 self.refersmf = True
273 for f in changes[3]:
273 for f in changes[3]:
274 if match(f):
274 if match(f):
275 filelinkrevs.setdefault(_normpath(f), []).append(i)
275 filelinkrevs.setdefault(_normpath(f), []).append(i)
276 except Exception as inst:
276 except Exception as inst:
277 self.refersmf = True
277 self.refersmf = True
278 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
278 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
279 progress.complete()
279 progress.complete()
280 return mflinkrevs, filelinkrevs
280 return mflinkrevs, filelinkrevs
281
281
282 def _verifymanifest(
282 def _verifymanifest(
283 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
283 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
284 ):
284 ):
285 """verify the manifestlog content
285 """verify the manifestlog content
286
286
287 Inputs:
287 Inputs:
288 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
288 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
289 - dir: a subdirectory to check (for tree manifest repo)
289 - dir: a subdirectory to check (for tree manifest repo)
290 - storefiles: set of currently "orphan" files.
290 - storefiles: set of currently "orphan" files.
291 - subdirprogress: a progress object
291 - subdirprogress: a progress object
292
292
293 This function checks:
293 This function checks:
294 * all of `_checkrevlog` checks (for all manifest related revlogs)
294 * all of `_checkrevlog` checks (for all manifest related revlogs)
295 * all of `_checkentry` checks (for all manifest related revisions)
295 * all of `_checkentry` checks (for all manifest related revisions)
296 * nodes for subdirectory exists in the sub-directory manifest
296 * nodes for subdirectory exists in the sub-directory manifest
297 * each manifest entries have a file path
297 * each manifest entries have a file path
298 * each manifest node refered in mflinkrevs exist in the manifest log
298 * each manifest node refered in mflinkrevs exist in the manifest log
299
299
300 If tree manifest is in use and a matchers is specified, only the
300 If tree manifest is in use and a matchers is specified, only the
301 sub-directories matching it will be verified.
301 sub-directories matching it will be verified.
302
302
303 return a two level mapping:
303 return a two level mapping:
304 {"path" -> { filenode -> changelog-revision}}
304 {"path" -> { filenode -> changelog-revision}}
305
305
306 This mapping primarily contains entries for every files in the
306 This mapping primarily contains entries for every files in the
307 repository. In addition, when tree-manifest is used, it also contains
307 repository. In addition, when tree-manifest is used, it also contains
308 sub-directory entries.
308 sub-directory entries.
309
309
310 If a matcher is provided, only matching paths will be included.
310 If a matcher is provided, only matching paths will be included.
311 """
311 """
312 repo = self.repo
312 repo = self.repo
313 ui = self.ui
313 ui = self.ui
314 match = self.match
314 match = self.match
315 mfl = self.repo.manifestlog
315 mfl = self.repo.manifestlog
316 mf = mfl.getstorage(dir)
316 mf = mfl.getstorage(dir)
317
317
318 if not dir:
318 if not dir:
319 self.ui.status(_(b"checking manifests\n"))
319 self.ui.status(_(b"checking manifests\n"))
320
320
321 filenodes = {}
321 filenodes = {}
322 subdirnodes = {}
322 subdirnodes = {}
323 seen = {}
323 seen = {}
324 label = b"manifest"
324 label = b"manifest"
325 if dir:
325 if dir:
326 label = dir
326 label = dir
327 revlogfiles = mf.files()
327 revlogfiles = mf.files()
328 storefiles.difference_update(revlogfiles)
328 storefiles.difference_update(revlogfiles)
329 if subdirprogress: # should be true since we're in a subdirectory
329 if subdirprogress: # should be true since we're in a subdirectory
330 subdirprogress.increment()
330 subdirprogress.increment()
331 if self.refersmf:
331 if self.refersmf:
332 # Do not check manifest if there are only changelog entries with
332 # Do not check manifest if there are only changelog entries with
333 # null manifests.
333 # null manifests.
334 self._checkrevlog(mf, label, 0)
334 self._checkrevlog(mf, label, 0)
335 progress = ui.makeprogress(
335 progress = ui.makeprogress(
336 _(b'checking'), unit=_(b'manifests'), total=len(mf)
336 _(b'checking'), unit=_(b'manifests'), total=len(mf)
337 )
337 )
338 for i in mf:
338 for i in mf:
339 if not dir:
339 if not dir:
340 progress.update(i)
340 progress.update(i)
341 n = mf.node(i)
341 n = mf.node(i)
342 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
342 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
343 if n in mflinkrevs:
343 if n in mflinkrevs:
344 del mflinkrevs[n]
344 del mflinkrevs[n]
345 elif dir:
345 elif dir:
346 self._err(
346 self._err(
347 lr,
347 lr,
348 _(b"%s not in parent-directory manifest") % short(n),
348 _(b"%s not in parent-directory manifest") % short(n),
349 label,
349 label,
350 )
350 )
351 else:
351 else:
352 self._err(lr, _(b"%s not in changesets") % short(n), label)
352 self._err(lr, _(b"%s not in changesets") % short(n), label)
353
353
354 try:
354 try:
355 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
355 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
356 for f, fn, fl in mfdelta.iterentries():
356 for f, fn, fl in mfdelta.iterentries():
357 if not f:
357 if not f:
358 self._err(lr, _(b"entry without name in manifest"))
358 self._err(lr, _(b"entry without name in manifest"))
359 elif f == b"/dev/null": # ignore this in very old repos
359 elif f == b"/dev/null": # ignore this in very old repos
360 continue
360 continue
361 fullpath = dir + _normpath(f)
361 fullpath = dir + _normpath(f)
362 if fl == b't':
362 if fl == b't':
363 if not match.visitdir(fullpath):
363 if not match.visitdir(fullpath):
364 continue
364 continue
365 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
365 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
366 fn, []
366 fn, []
367 ).append(lr)
367 ).append(lr)
368 else:
368 else:
369 if not match(fullpath):
369 if not match(fullpath):
370 continue
370 continue
371 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
371 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
372 except Exception as inst:
372 except Exception as inst:
373 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
373 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
374 if self._level >= VERIFY_FULL:
374 if self._level >= VERIFY_FULL:
375 try:
375 try:
376 # Various issues can affect manifest. So we read each full
376 # Various issues can affect manifest. So we read each full
377 # text from storage. This triggers the checks from the core
377 # text from storage. This triggers the checks from the core
378 # code (eg: hash verification, filename are ordered, etc.)
378 # code (eg: hash verification, filename are ordered, etc.)
379 mfdelta = mfl.get(dir, n).read()
379 mfdelta = mfl.get(dir, n).read()
380 except Exception as inst:
380 except Exception as inst:
381 self._exc(
381 self._exc(
382 lr,
382 lr,
383 _(b"reading full manifest %s") % short(n),
383 _(b"reading full manifest %s") % short(n),
384 inst,
384 inst,
385 label,
385 label,
386 )
386 )
387
387
388 if not dir:
388 if not dir:
389 progress.complete()
389 progress.complete()
390
390
391 if self.havemf:
391 if self.havemf:
392 # since we delete entry in `mflinkrevs` during iteration, any
392 # since we delete entry in `mflinkrevs` during iteration, any
393 # remaining entries are "missing". We need to issue errors for them.
393 # remaining entries are "missing". We need to issue errors for them.
394 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
394 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
395 for c, m in sorted(changesetpairs):
395 for c, m in sorted(changesetpairs):
396 if dir:
396 if dir:
397 self._err(
397 self._err(
398 c,
398 c,
399 _(
399 _(
400 b"parent-directory manifest refers to unknown"
400 b"parent-directory manifest refers to unknown"
401 b" revision %s"
401 b" revision %s"
402 )
402 )
403 % short(m),
403 % short(m),
404 label,
404 label,
405 )
405 )
406 else:
406 else:
407 self._err(
407 self._err(
408 c,
408 c,
409 _(b"changeset refers to unknown revision %s")
409 _(b"changeset refers to unknown revision %s")
410 % short(m),
410 % short(m),
411 label,
411 label,
412 )
412 )
413
413
414 if not dir and subdirnodes:
414 if not dir and subdirnodes:
415 self.ui.status(_(b"checking directory manifests\n"))
415 self.ui.status(_(b"checking directory manifests\n"))
416 storefiles = set()
416 storefiles = set()
417 subdirs = set()
417 subdirs = set()
418 revlogv1 = self.revlogv1
418 revlogv1 = self.revlogv1
419 for f, f2, size in repo.store.datafiles():
419 for f, f2, size in repo.store.datafiles():
420 if not f:
420 if not f:
421 self._err(None, _(b"cannot decode filename '%s'") % f2)
421 self._err(None, _(b"cannot decode filename '%s'") % f2)
422 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
422 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
423 storefiles.add(_normpath(f))
423 storefiles.add(_normpath(f))
424 subdirs.add(os.path.dirname(f))
424 subdirs.add(os.path.dirname(f))
425 subdirprogress = ui.makeprogress(
425 subdirprogress = ui.makeprogress(
426 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
426 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
427 )
427 )
428
428
429 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
429 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
430 subdirfilenodes = self._verifymanifest(
430 subdirfilenodes = self._verifymanifest(
431 linkrevs, subdir, storefiles, subdirprogress
431 linkrevs, subdir, storefiles, subdirprogress
432 )
432 )
433 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
433 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
434 filenodes.setdefault(f, {}).update(onefilenodes)
434 filenodes.setdefault(f, {}).update(onefilenodes)
435
435
436 if not dir and subdirnodes:
436 if not dir and subdirnodes:
437 assert subdirprogress is not None # help pytype
437 subdirprogress.complete()
438 subdirprogress.complete()
438 if self.warnorphanstorefiles:
439 if self.warnorphanstorefiles:
439 for f in sorted(storefiles):
440 for f in sorted(storefiles):
440 self._warn(_(b"warning: orphan data file '%s'") % f)
441 self._warn(_(b"warning: orphan data file '%s'") % f)
441
442
442 return filenodes
443 return filenodes
443
444
444 def _crosscheckfiles(self, filelinkrevs, filenodes):
445 def _crosscheckfiles(self, filelinkrevs, filenodes):
445 repo = self.repo
446 repo = self.repo
446 ui = self.ui
447 ui = self.ui
447 ui.status(_(b"crosschecking files in changesets and manifests\n"))
448 ui.status(_(b"crosschecking files in changesets and manifests\n"))
448
449
449 total = len(filelinkrevs) + len(filenodes)
450 total = len(filelinkrevs) + len(filenodes)
450 progress = ui.makeprogress(
451 progress = ui.makeprogress(
451 _(b'crosschecking'), unit=_(b'files'), total=total
452 _(b'crosschecking'), unit=_(b'files'), total=total
452 )
453 )
453 if self.havemf:
454 if self.havemf:
454 for f in sorted(filelinkrevs):
455 for f in sorted(filelinkrevs):
455 progress.increment()
456 progress.increment()
456 if f not in filenodes:
457 if f not in filenodes:
457 lr = filelinkrevs[f][0]
458 lr = filelinkrevs[f][0]
458 self._err(lr, _(b"in changeset but not in manifest"), f)
459 self._err(lr, _(b"in changeset but not in manifest"), f)
459
460
460 if self.havecl:
461 if self.havecl:
461 for f in sorted(filenodes):
462 for f in sorted(filenodes):
462 progress.increment()
463 progress.increment()
463 if f not in filelinkrevs:
464 if f not in filelinkrevs:
464 try:
465 try:
465 fl = repo.file(f)
466 fl = repo.file(f)
466 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
467 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
467 except Exception:
468 except Exception:
468 lr = None
469 lr = None
469 self._err(lr, _(b"in manifest but not in changeset"), f)
470 self._err(lr, _(b"in manifest but not in changeset"), f)
470
471
471 progress.complete()
472 progress.complete()
472
473
473 def _verifyfiles(self, filenodes, filelinkrevs):
474 def _verifyfiles(self, filenodes, filelinkrevs):
474 repo = self.repo
475 repo = self.repo
475 ui = self.ui
476 ui = self.ui
476 lrugetctx = self.lrugetctx
477 lrugetctx = self.lrugetctx
477 revlogv1 = self.revlogv1
478 revlogv1 = self.revlogv1
478 havemf = self.havemf
479 havemf = self.havemf
479 ui.status(_(b"checking files\n"))
480 ui.status(_(b"checking files\n"))
480
481
481 storefiles = set()
482 storefiles = set()
482 for f, f2, size in repo.store.datafiles():
483 for f, f2, size in repo.store.datafiles():
483 if not f:
484 if not f:
484 self._err(None, _(b"cannot decode filename '%s'") % f2)
485 self._err(None, _(b"cannot decode filename '%s'") % f2)
485 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
486 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
486 storefiles.add(_normpath(f))
487 storefiles.add(_normpath(f))
487
488
488 state = {
489 state = {
489 # TODO this assumes revlog storage for changelog.
490 # TODO this assumes revlog storage for changelog.
490 b'expectedversion': self.repo.changelog.version & 0xFFFF,
491 b'expectedversion': self.repo.changelog.version & 0xFFFF,
491 b'skipflags': self.skipflags,
492 b'skipflags': self.skipflags,
492 # experimental config: censor.policy
493 # experimental config: censor.policy
493 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
494 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
494 }
495 }
495
496
496 files = sorted(set(filenodes) | set(filelinkrevs))
497 files = sorted(set(filenodes) | set(filelinkrevs))
497 revisions = 0
498 revisions = 0
498 progress = ui.makeprogress(
499 progress = ui.makeprogress(
499 _(b'checking'), unit=_(b'files'), total=len(files)
500 _(b'checking'), unit=_(b'files'), total=len(files)
500 )
501 )
501 for i, f in enumerate(files):
502 for i, f in enumerate(files):
502 progress.update(i, item=f)
503 progress.update(i, item=f)
503 try:
504 try:
504 linkrevs = filelinkrevs[f]
505 linkrevs = filelinkrevs[f]
505 except KeyError:
506 except KeyError:
506 # in manifest but not in changelog
507 # in manifest but not in changelog
507 linkrevs = []
508 linkrevs = []
508
509
509 if linkrevs:
510 if linkrevs:
510 lr = linkrevs[0]
511 lr = linkrevs[0]
511 else:
512 else:
512 lr = None
513 lr = None
513
514
514 try:
515 try:
515 fl = repo.file(f)
516 fl = repo.file(f)
516 except error.StorageError as e:
517 except error.StorageError as e:
517 self._err(lr, _(b"broken revlog! (%s)") % e, f)
518 self._err(lr, _(b"broken revlog! (%s)") % e, f)
518 continue
519 continue
519
520
520 for ff in fl.files():
521 for ff in fl.files():
521 try:
522 try:
522 storefiles.remove(ff)
523 storefiles.remove(ff)
523 except KeyError:
524 except KeyError:
524 if self.warnorphanstorefiles:
525 if self.warnorphanstorefiles:
525 self._warn(
526 self._warn(
526 _(b" warning: revlog '%s' not in fncache!") % ff
527 _(b" warning: revlog '%s' not in fncache!") % ff
527 )
528 )
528 self.fncachewarned = True
529 self.fncachewarned = True
529
530
530 if not len(fl) and (self.havecl or self.havemf):
531 if not len(fl) and (self.havecl or self.havemf):
531 self._err(lr, _(b"empty or missing %s") % f)
532 self._err(lr, _(b"empty or missing %s") % f)
532 else:
533 else:
533 # Guard against implementations not setting this.
534 # Guard against implementations not setting this.
534 state[b'skipread'] = set()
535 state[b'skipread'] = set()
535 state[b'safe_renamed'] = set()
536 state[b'safe_renamed'] = set()
536
537
537 for problem in fl.verifyintegrity(state):
538 for problem in fl.verifyintegrity(state):
538 if problem.node is not None:
539 if problem.node is not None:
539 linkrev = fl.linkrev(fl.rev(problem.node))
540 linkrev = fl.linkrev(fl.rev(problem.node))
540 else:
541 else:
541 linkrev = None
542 linkrev = None
542
543
543 if problem.warning:
544 if problem.warning:
544 self._warn(problem.warning)
545 self._warn(problem.warning)
545 elif problem.error:
546 elif problem.error:
546 self._err(
547 self._err(
547 linkrev if linkrev is not None else lr,
548 linkrev if linkrev is not None else lr,
548 problem.error,
549 problem.error,
549 f,
550 f,
550 )
551 )
551 else:
552 else:
552 raise error.ProgrammingError(
553 raise error.ProgrammingError(
553 b'problem instance does not set warning or error '
554 b'problem instance does not set warning or error '
554 b'attribute: %s' % problem.msg
555 b'attribute: %s' % problem.msg
555 )
556 )
556
557
557 seen = {}
558 seen = {}
558 for i in fl:
559 for i in fl:
559 revisions += 1
560 revisions += 1
560 n = fl.node(i)
561 n = fl.node(i)
561 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
562 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
562 if f in filenodes:
563 if f in filenodes:
563 if havemf and n not in filenodes[f]:
564 if havemf and n not in filenodes[f]:
564 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
565 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
565 else:
566 else:
566 del filenodes[f][n]
567 del filenodes[f][n]
567
568
568 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
569 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
569 continue
570 continue
570
571
571 # check renames
572 # check renames
572 try:
573 try:
573 # This requires resolving fulltext (at least on revlogs,
574 # This requires resolving fulltext (at least on revlogs,
574 # though not with LFS revisions). We may want
575 # though not with LFS revisions). We may want
575 # ``verifyintegrity()`` to pass a set of nodes with
576 # ``verifyintegrity()`` to pass a set of nodes with
576 # rename metadata as an optimization.
577 # rename metadata as an optimization.
577 rp = fl.renamed(n)
578 rp = fl.renamed(n)
578 if rp:
579 if rp:
579 if lr is not None and ui.verbose:
580 if lr is not None and ui.verbose:
580 ctx = lrugetctx(lr)
581 ctx = lrugetctx(lr)
581 if not any(rp[0] in pctx for pctx in ctx.parents()):
582 if not any(rp[0] in pctx for pctx in ctx.parents()):
582 self._warn(
583 self._warn(
583 _(
584 _(
584 b"warning: copy source of '%s' not"
585 b"warning: copy source of '%s' not"
585 b" in parents of %s"
586 b" in parents of %s"
586 )
587 )
587 % (f, ctx)
588 % (f, ctx)
588 )
589 )
589 fl2 = repo.file(rp[0])
590 fl2 = repo.file(rp[0])
590 if not len(fl2):
591 if not len(fl2):
591 self._err(
592 self._err(
592 lr,
593 lr,
593 _(
594 _(
594 b"empty or missing copy source revlog "
595 b"empty or missing copy source revlog "
595 b"%s:%s"
596 b"%s:%s"
596 )
597 )
597 % (rp[0], short(rp[1])),
598 % (rp[0], short(rp[1])),
598 f,
599 f,
599 )
600 )
600 elif rp[1] == nullid:
601 elif rp[1] == nullid:
601 ui.note(
602 ui.note(
602 _(
603 _(
603 b"warning: %s@%s: copy source"
604 b"warning: %s@%s: copy source"
604 b" revision is nullid %s:%s\n"
605 b" revision is nullid %s:%s\n"
605 )
606 )
606 % (f, lr, rp[0], short(rp[1]))
607 % (f, lr, rp[0], short(rp[1]))
607 )
608 )
608 else:
609 else:
609 fl2.rev(rp[1])
610 fl2.rev(rp[1])
610 except Exception as inst:
611 except Exception as inst:
611 self._exc(
612 self._exc(
612 lr, _(b"checking rename of %s") % short(n), inst, f
613 lr, _(b"checking rename of %s") % short(n), inst, f
613 )
614 )
614
615
615 # cross-check
616 # cross-check
616 if f in filenodes:
617 if f in filenodes:
617 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
618 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
618 for lr, node in sorted(fns):
619 for lr, node in sorted(fns):
619 self._err(
620 self._err(
620 lr,
621 lr,
621 _(b"manifest refers to unknown revision %s")
622 _(b"manifest refers to unknown revision %s")
622 % short(node),
623 % short(node),
623 f,
624 f,
624 )
625 )
625 progress.complete()
626 progress.complete()
626
627
627 if self.warnorphanstorefiles:
628 if self.warnorphanstorefiles:
628 for f in sorted(storefiles):
629 for f in sorted(storefiles):
629 self._warn(_(b"warning: orphan data file '%s'") % f)
630 self._warn(_(b"warning: orphan data file '%s'") % f)
630
631
631 return len(files), revisions
632 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now