##// END OF EJS Templates
verify: small refactoring and documentation in `_verifymanifest`...
marmoute -
r42043:9c5a6af7 default
parent child Browse files
Show More
@@ -1,533 +1,535
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 pycompat,
21 21 revlog,
22 22 util,
23 23 )
24 24
25 25 def verify(repo):
26 26 with repo.lock():
27 27 return verifier(repo).verify()
28 28
29 29 def _normpath(f):
30 30 # under hg < 2.4, convert didn't sanitize paths properly, so a
31 31 # converted repo may contain repeated slashes
32 32 while '//' in f:
33 33 f = f.replace('//', '/')
34 34 return f
35 35
36 36 class verifier(object):
37 37 def __init__(self, repo):
38 38 self.repo = repo.unfiltered()
39 39 self.ui = repo.ui
40 40 self.match = repo.narrowmatch()
41 41 self.badrevs = set()
42 42 self.errors = 0
43 43 self.warnings = 0
44 44 self.havecl = len(repo.changelog) > 0
45 45 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
46 46 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
47 47 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
48 48 self.refersmf = False
49 49 self.fncachewarned = False
50 50 # developer config: verify.skipflags
51 51 self.skipflags = repo.ui.configint('verify', 'skipflags')
52 52 self.warnorphanstorefiles = True
53 53
54 54 def _warn(self, msg):
55 55 """record a "warning" level issue"""
56 56 self.ui.warn(msg + "\n")
57 57 self.warnings += 1
58 58
59 59 def _err(self, linkrev, msg, filename=None):
60 60 """record a "error" level issue"""
61 61 if linkrev is not None:
62 62 self.badrevs.add(linkrev)
63 63 linkrev = "%d" % linkrev
64 64 else:
65 65 linkrev = '?'
66 66 msg = "%s: %s" % (linkrev, msg)
67 67 if filename:
68 68 msg = "%s@%s" % (filename, msg)
69 69 self.ui.warn(" " + msg + "\n")
70 70 self.errors += 1
71 71
72 72 def _exc(self, linkrev, msg, inst, filename=None):
73 73 """record exception raised during the verify process"""
74 74 fmsg = pycompat.bytestr(inst)
75 75 if not fmsg:
76 76 fmsg = pycompat.byterepr(inst)
77 77 self._err(linkrev, "%s: %s" % (msg, fmsg), filename)
78 78
79 79 def _checkrevlog(self, obj, name, linkrev):
80 80 """verify high level property of a revlog
81 81
82 82 - revlog is present,
83 83 - revlog is non-empty,
84 84 - sizes (index and data) are correct,
85 85 - revlog's format version is correct.
86 86 """
87 87 if not len(obj) and (self.havecl or self.havemf):
88 88 self._err(linkrev, _("empty or missing %s") % name)
89 89 return
90 90
91 91 d = obj.checksize()
92 92 if d[0]:
93 93 self.err(None, _("data length off by %d bytes") % d[0], name)
94 94 if d[1]:
95 95 self.err(None, _("index contains %d extra bytes") % d[1], name)
96 96
97 97 if obj.version != revlog.REVLOGV0:
98 98 if not self.revlogv1:
99 99 self._warn(_("warning: `%s' uses revlog format 1") % name)
100 100 elif self.revlogv1:
101 101 self._warn(_("warning: `%s' uses revlog format 0") % name)
102 102
103 103 def _checkentry(self, obj, i, node, seen, linkrevs, f):
104 104 """verify a single revlog entry
105 105
106 106 arguments are:
107 107 - obj: the source revlog
108 108 - i: the revision number
109 109 - node: the revision node id
110 110 - seen: nodes previously seen for this revlog
111 111 - linkrevs: [changelog-revisions] introducing "node"
112 112 - f: string label ("changelog", "manifest", or filename)
113 113
114 114 Performs the following checks:
115 115 - linkrev points to an existing changelog revision,
116 116 - linkrev points to a changelog revision that introduces this revision,
117 117 - linkrev points to the lowest of these changesets,
118 118 - both parents exist in the revlog,
119 119 - the revision is not duplicated.
120 120
121 121 Return the linkrev of the revision (or None for changelog's revisions).
122 122 """
123 123 lr = obj.linkrev(obj.rev(node))
124 124 if lr < 0 or (self.havecl and lr not in linkrevs):
125 125 if lr < 0 or lr >= len(self.repo.changelog):
126 126 msg = _("rev %d points to nonexistent changeset %d")
127 127 else:
128 128 msg = _("rev %d points to unexpected changeset %d")
129 129 self._err(None, msg % (i, lr), f)
130 130 if linkrevs:
131 131 if f and len(linkrevs) > 1:
132 132 try:
133 133 # attempt to filter down to real linkrevs
134 134 linkrevs = [l for l in linkrevs
135 135 if self.lrugetctx(l)[f].filenode() == node]
136 136 except Exception:
137 137 pass
138 138 self._warn(_(" (expected %s)") % " ".join
139 139 (map(pycompat.bytestr, linkrevs)))
140 140 lr = None # can't be trusted
141 141
142 142 try:
143 143 p1, p2 = obj.parents(node)
144 144 if p1 not in seen and p1 != nullid:
145 145 self._err(lr, _("unknown parent 1 %s of %s") %
146 146 (short(p1), short(node)), f)
147 147 if p2 not in seen and p2 != nullid:
148 148 self._err(lr, _("unknown parent 2 %s of %s") %
149 149 (short(p2), short(node)), f)
150 150 except Exception as inst:
151 151 self._exc(lr, _("checking parents of %s") % short(node), inst, f)
152 152
153 153 if node in seen:
154 154 self._err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
155 155 seen[node] = i
156 156 return lr
157 157
158 158 def verify(self):
159 159 """verify the content of the Mercurial repository
160 160
161 161 This method run all verifications, displaying issues as they are found.
162 162
163 163 return 1 if any error have been encountered, 0 otherwise."""
164 164 # initial validation and generic report
165 165 repo = self.repo
166 166 ui = repo.ui
167 167 if not repo.url().startswith('file:'):
168 168 raise error.Abort(_("cannot verify bundle or remote repos"))
169 169
170 170 if os.path.exists(repo.sjoin("journal")):
171 171 ui.warn(_("abandoned transaction found - run hg recover\n"))
172 172
173 173 if ui.verbose or not self.revlogv1:
174 174 ui.status(_("repository uses revlog format %d\n") %
175 175 (self.revlogv1 and 1 or 0))
176 176
177 177 # data verification
178 178 mflinkrevs, filelinkrevs = self._verifychangelog()
179 179 filenodes = self._verifymanifest(mflinkrevs)
180 180 del mflinkrevs
181 181 self._crosscheckfiles(filelinkrevs, filenodes)
182 182 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
183 183
184 184 # final report
185 185 ui.status(_("checked %d changesets with %d changes to %d files\n") %
186 186 (len(repo.changelog), filerevisions, totalfiles))
187 187 if self.warnings:
188 188 ui.warn(_("%d warnings encountered!\n") % self.warnings)
189 189 if self.fncachewarned:
190 190 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
191 191 'corrupt fncache\n'))
192 192 if self.errors:
193 193 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
194 194 if self.badrevs:
195 195 ui.warn(_("(first damaged changeset appears to be %d)\n")
196 196 % min(self.badrevs))
197 197 return 1
198 198 return 0
199 199
200 200 def _verifychangelog(self):
201 201 """verify the changelog of a repository
202 202
203 203 The following checks are performed:
204 204 - all of `_checkrevlog` checks,
205 205 - all of `_checkentry` checks (for each revisions),
206 206 - each revision can be read.
207 207
208 208 The function returns some of the data observed in the changesets as a
209 209 (mflinkrevs, filelinkrevs) tuples:
210 210 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
211 211 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
212 212
213 213 If a matcher was specified, filelinkrevs will only contains matched
214 214 files.
215 215 """
216 216 ui = self.ui
217 217 repo = self.repo
218 218 match = self.match
219 219 cl = repo.changelog
220 220
221 221 ui.status(_("checking changesets\n"))
222 222 mflinkrevs = {}
223 223 filelinkrevs = {}
224 224 seen = {}
225 225 self._checkrevlog(cl, "changelog", 0)
226 226 progress = ui.makeprogress(_('checking'), unit=_('changesets'),
227 227 total=len(repo))
228 228 for i in repo:
229 229 progress.update(i)
230 230 n = cl.node(i)
231 231 self._checkentry(cl, i, n, seen, [i], "changelog")
232 232
233 233 try:
234 234 changes = cl.read(n)
235 235 if changes[0] != nullid:
236 236 mflinkrevs.setdefault(changes[0], []).append(i)
237 237 self.refersmf = True
238 238 for f in changes[3]:
239 239 if match(f):
240 240 filelinkrevs.setdefault(_normpath(f), []).append(i)
241 241 except Exception as inst:
242 242 self.refersmf = True
243 243 self._exc(i, _("unpacking changeset %s") % short(n), inst)
244 244 progress.complete()
245 245 return mflinkrevs, filelinkrevs
246 246
247 247 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
248 248 subdirprogress=None):
249 249 """verify the manifestlog content
250 250
251 251 Inputs:
252 252 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
253 253 - dir: a subdirectory to check (for tree manifest repo)
254 254 - storefiles: set of currently "orphan" files.
255 255 - subdirprogress: a progress object
256 256
257 257 This function checks:
258 258 * all of `_checkrevlog` checks (for all manifest related revlogs)
259 259 * all of `_checkentry` checks (for all manifest related revisions)
260 260 * nodes for subdirectory exists in the sub-directory manifest
261 261 * each manifest entries have a file path
262 262 * each manifest node refered in mflinkrevs exist in the manifest log
263 263
264 264 If tree manifest is in use and a matchers is specified, only the
265 265 sub-directories matching it will be verified.
266 266
267 267 return a two level mapping:
268 268 {"path" -> { filenode -> changelog-revision}}
269 269
270 270 This mapping primarily contains entries for every files in the
271 271 repository. In addition, when tree-manifest is used, it also contains
272 272 sub-directory entries.
273 273
274 274 If a matcher is provided, only matching paths will be included.
275 275 """
276 276 repo = self.repo
277 277 ui = self.ui
278 278 match = self.match
279 279 mfl = self.repo.manifestlog
280 280 mf = mfl.getstorage(dir)
281 281
282 282 if not dir:
283 283 self.ui.status(_("checking manifests\n"))
284 284
285 285 filenodes = {}
286 286 subdirnodes = {}
287 287 seen = {}
288 288 label = "manifest"
289 289 if dir:
290 290 label = dir
291 291 revlogfiles = mf.files()
292 292 storefiles.difference_update(revlogfiles)
293 293 if subdirprogress: # should be true since we're in a subdirectory
294 294 subdirprogress.increment()
295 295 if self.refersmf:
296 296 # Do not check manifest if there are only changelog entries with
297 297 # null manifests.
298 298 self._checkrevlog(mf, label, 0)
299 299 progress = ui.makeprogress(_('checking'), unit=_('manifests'),
300 300 total=len(mf))
301 301 for i in mf:
302 302 if not dir:
303 303 progress.update(i)
304 304 n = mf.node(i)
305 305 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
306 306 if n in mflinkrevs:
307 307 del mflinkrevs[n]
308 308 elif dir:
309 309 self._err(lr, _("%s not in parent-directory manifest") %
310 310 short(n), label)
311 311 else:
312 312 self._err(lr, _("%s not in changesets") % short(n), label)
313 313
314 314 try:
315 315 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
316 316 for f, fn, fl in mfdelta.iterentries():
317 317 if not f:
318 318 self._err(lr, _("entry without name in manifest"))
319 319 elif f == "/dev/null": # ignore this in very old repos
320 320 continue
321 321 fullpath = dir + _normpath(f)
322 322 if fl == 't':
323 323 if not match.visitdir(fullpath):
324 324 continue
325 325 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
326 326 fn, []).append(lr)
327 327 else:
328 328 if not match(fullpath):
329 329 continue
330 330 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
331 331 except Exception as inst:
332 332 self._exc(lr, _("reading delta %s") % short(n), inst, label)
333 333 if not dir:
334 334 progress.complete()
335 335
336 336 if self.havemf:
337 for c, m in sorted([(c, m) for m in mflinkrevs
338 for c in mflinkrevs[m]]):
337 # since we delete entry in `mflinkrevs` during iteration, any
338 # remaining entries are "missing". We need to issue errors for them.
339 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
340 for c, m in sorted(changesetpairs):
339 341 if dir:
340 342 self._err(c, _("parent-directory manifest refers to unknown"
341 343 " revision %s") % short(m), label)
342 344 else:
343 345 self._err(c, _("changeset refers to unknown revision %s") %
344 346 short(m), label)
345 347
346 348 if not dir and subdirnodes:
347 349 self.ui.status(_("checking directory manifests\n"))
348 350 storefiles = set()
349 351 subdirs = set()
350 352 revlogv1 = self.revlogv1
351 353 for f, f2, size in repo.store.datafiles():
352 354 if not f:
353 355 self._err(None, _("cannot decode filename '%s'") % f2)
354 356 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
355 357 storefiles.add(_normpath(f))
356 358 subdirs.add(os.path.dirname(f))
357 359 subdirprogress = ui.makeprogress(_('checking'), unit=_('manifests'),
358 360 total=len(subdirs))
359 361
360 362 for subdir, linkrevs in subdirnodes.iteritems():
361 363 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
362 364 subdirprogress)
363 365 for f, onefilenodes in subdirfilenodes.iteritems():
364 366 filenodes.setdefault(f, {}).update(onefilenodes)
365 367
366 368 if not dir and subdirnodes:
367 369 subdirprogress.complete()
368 370 if self.warnorphanstorefiles:
369 371 for f in sorted(storefiles):
370 372 self._warn(_("warning: orphan data file '%s'") % f)
371 373
372 374 return filenodes
373 375
374 376 def _crosscheckfiles(self, filelinkrevs, filenodes):
375 377 repo = self.repo
376 378 ui = self.ui
377 379 ui.status(_("crosschecking files in changesets and manifests\n"))
378 380
379 381 total = len(filelinkrevs) + len(filenodes)
380 382 progress = ui.makeprogress(_('crosschecking'), unit=_('files'),
381 383 total=total)
382 384 if self.havemf:
383 385 for f in sorted(filelinkrevs):
384 386 progress.increment()
385 387 if f not in filenodes:
386 388 lr = filelinkrevs[f][0]
387 389 self._err(lr, _("in changeset but not in manifest"), f)
388 390
389 391 if self.havecl:
390 392 for f in sorted(filenodes):
391 393 progress.increment()
392 394 if f not in filelinkrevs:
393 395 try:
394 396 fl = repo.file(f)
395 397 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
396 398 except Exception:
397 399 lr = None
398 400 self._err(lr, _("in manifest but not in changeset"), f)
399 401
400 402 progress.complete()
401 403
402 404 def _verifyfiles(self, filenodes, filelinkrevs):
403 405 repo = self.repo
404 406 ui = self.ui
405 407 lrugetctx = self.lrugetctx
406 408 revlogv1 = self.revlogv1
407 409 havemf = self.havemf
408 410 ui.status(_("checking files\n"))
409 411
410 412 storefiles = set()
411 413 for f, f2, size in repo.store.datafiles():
412 414 if not f:
413 415 self._err(None, _("cannot decode filename '%s'") % f2)
414 416 elif (size > 0 or not revlogv1) and f.startswith('data/'):
415 417 storefiles.add(_normpath(f))
416 418
417 419 state = {
418 420 # TODO this assumes revlog storage for changelog.
419 421 'expectedversion': self.repo.changelog.version & 0xFFFF,
420 422 'skipflags': self.skipflags,
421 423 # experimental config: censor.policy
422 424 'erroroncensored': ui.config('censor', 'policy') == 'abort',
423 425 }
424 426
425 427 files = sorted(set(filenodes) | set(filelinkrevs))
426 428 revisions = 0
427 429 progress = ui.makeprogress(_('checking'), unit=_('files'),
428 430 total=len(files))
429 431 for i, f in enumerate(files):
430 432 progress.update(i, item=f)
431 433 try:
432 434 linkrevs = filelinkrevs[f]
433 435 except KeyError:
434 436 # in manifest but not in changelog
435 437 linkrevs = []
436 438
437 439 if linkrevs:
438 440 lr = linkrevs[0]
439 441 else:
440 442 lr = None
441 443
442 444 try:
443 445 fl = repo.file(f)
444 446 except error.StorageError as e:
445 447 self._err(lr, _("broken revlog! (%s)") % e, f)
446 448 continue
447 449
448 450 for ff in fl.files():
449 451 try:
450 452 storefiles.remove(ff)
451 453 except KeyError:
452 454 if self.warnorphanstorefiles:
453 455 self._warn(_(" warning: revlog '%s' not in fncache!") %
454 456 ff)
455 457 self.fncachewarned = True
456 458
457 459 if not len(fl) and (self.havecl or self.havemf):
458 460 self._err(lr, _("empty or missing %s") % f)
459 461 else:
460 462 # Guard against implementations not setting this.
461 463 state['skipread'] = set()
462 464 for problem in fl.verifyintegrity(state):
463 465 if problem.node is not None:
464 466 linkrev = fl.linkrev(fl.rev(problem.node))
465 467 else:
466 468 linkrev = None
467 469
468 470 if problem.warning:
469 471 self._warn(problem.warning)
470 472 elif problem.error:
471 473 self._err(linkrev if linkrev is not None else lr,
472 474 problem.error, f)
473 475 else:
474 476 raise error.ProgrammingError(
475 477 'problem instance does not set warning or error '
476 478 'attribute: %s' % problem.msg)
477 479
478 480 seen = {}
479 481 for i in fl:
480 482 revisions += 1
481 483 n = fl.node(i)
482 484 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
483 485 if f in filenodes:
484 486 if havemf and n not in filenodes[f]:
485 487 self._err(lr, _("%s not in manifests") % (short(n)), f)
486 488 else:
487 489 del filenodes[f][n]
488 490
489 491 if n in state['skipread']:
490 492 continue
491 493
492 494 # check renames
493 495 try:
494 496 # This requires resolving fulltext (at least on revlogs). We
495 497 # may want ``verifyintegrity()`` to pass a set of nodes with
496 498 # rename metadata as an optimization.
497 499 rp = fl.renamed(n)
498 500 if rp:
499 501 if lr is not None and ui.verbose:
500 502 ctx = lrugetctx(lr)
501 503 if not any(rp[0] in pctx for pctx in ctx.parents()):
502 504 self._warn(_("warning: copy source of '%s' not"
503 505 " in parents of %s") % (f, ctx))
504 506 fl2 = repo.file(rp[0])
505 507 if not len(fl2):
506 508 self._err(lr,
507 509 _("empty or missing copy source revlog "
508 510 "%s:%s") % (rp[0],
509 511 short(rp[1])),
510 512 f)
511 513 elif rp[1] == nullid:
512 514 ui.note(_("warning: %s@%s: copy source"
513 515 " revision is nullid %s:%s\n")
514 516 % (f, lr, rp[0], short(rp[1])))
515 517 else:
516 518 fl2.rev(rp[1])
517 519 except Exception as inst:
518 520 self._exc(lr, _("checking rename of %s") % short(n),
519 521 inst, f)
520 522
521 523 # cross-check
522 524 if f in filenodes:
523 525 fns = [(v, k) for k, v in filenodes[f].iteritems()]
524 526 for lr, node in sorted(fns):
525 527 self._err(lr, _("manifest refers to unknown revision %s") %
526 528 short(node), f)
527 529 progress.complete()
528 530
529 531 if self.warnorphanstorefiles:
530 532 for f in sorted(storefiles):
531 533 self._warn(_("warning: orphan data file '%s'") % f)
532 534
533 535 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now