##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48152:9823b348 default
parent child Browse files
Show More
@@ -1,610 +1,606 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import short
14 14 from .utils import stringutil
15 15
16 16 from . import (
17 17 error,
18 18 pycompat,
19 19 revlog,
20 20 util,
21 21 )
22 22
23 23 VERIFY_DEFAULT = 0
24 24 VERIFY_FULL = 1
25 25
26 26
27 27 def verify(repo, level=None):
28 28 with repo.lock():
29 29 v = verifier(repo, level)
30 30 return v.verify()
31 31
32 32
33 33 def _normpath(f):
34 34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 35 # converted repo may contain repeated slashes
36 36 while b'//' in f:
37 37 f = f.replace(b'//', b'/')
38 38 return f
39 39
40 40
41 41 HINT_FNCACHE = _(
42 42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 43 )
44 44
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 b"parent-directory manifest refers to unknown revision %s"
47 )
48
45 49
46 50 class verifier(object):
47 51 def __init__(self, repo, level=None):
48 52 self.repo = repo.unfiltered()
49 53 self.ui = repo.ui
50 54 self.match = repo.narrowmatch()
51 55 if level is None:
52 56 level = VERIFY_DEFAULT
53 57 self._level = level
54 58 self.badrevs = set()
55 59 self.errors = 0
56 60 self.warnings = 0
57 61 self.havecl = len(repo.changelog) > 0
58 62 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
59 63 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
60 64 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
61 65 self.refersmf = False
62 66 self.fncachewarned = False
63 67 # developer config: verify.skipflags
64 68 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
65 69 self.warnorphanstorefiles = True
66 70
67 71 def _warn(self, msg):
68 72 """record a "warning" level issue"""
69 73 self.ui.warn(msg + b"\n")
70 74 self.warnings += 1
71 75
72 76 def _err(self, linkrev, msg, filename=None):
73 77 """record a "error" level issue"""
74 78 if linkrev is not None:
75 79 self.badrevs.add(linkrev)
76 80 linkrev = b"%d" % linkrev
77 81 else:
78 82 linkrev = b'?'
79 83 msg = b"%s: %s" % (linkrev, msg)
80 84 if filename:
81 85 msg = b"%s@%s" % (filename, msg)
82 86 self.ui.warn(b" " + msg + b"\n")
83 87 self.errors += 1
84 88
85 89 def _exc(self, linkrev, msg, inst, filename=None):
86 90 """record exception raised during the verify process"""
87 91 fmsg = stringutil.forcebytestr(inst)
88 92 if not fmsg:
89 93 fmsg = pycompat.byterepr(inst)
90 94 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
91 95
92 96 def _checkrevlog(self, obj, name, linkrev):
93 97 """verify high level property of a revlog
94 98
95 99 - revlog is present,
96 100 - revlog is non-empty,
97 101 - sizes (index and data) are correct,
98 102 - revlog's format version is correct.
99 103 """
100 104 if not len(obj) and (self.havecl or self.havemf):
101 105 self._err(linkrev, _(b"empty or missing %s") % name)
102 106 return
103 107
104 108 d = obj.checksize()
105 109 if d[0]:
106 110 self._err(None, _(b"data length off by %d bytes") % d[0], name)
107 111 if d[1]:
108 112 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
109 113
110 114 if obj._format_version != revlog.REVLOGV0:
111 115 if not self.revlogv1:
112 116 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
113 117 elif self.revlogv1:
114 118 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
115 119
116 120 def _checkentry(self, obj, i, node, seen, linkrevs, f):
117 121 """verify a single revlog entry
118 122
119 123 arguments are:
120 124 - obj: the source revlog
121 125 - i: the revision number
122 126 - node: the revision node id
123 127 - seen: nodes previously seen for this revlog
124 128 - linkrevs: [changelog-revisions] introducing "node"
125 129 - f: string label ("changelog", "manifest", or filename)
126 130
127 131 Performs the following checks:
128 132 - linkrev points to an existing changelog revision,
129 133 - linkrev points to a changelog revision that introduces this revision,
130 134 - linkrev points to the lowest of these changesets,
131 135 - both parents exist in the revlog,
132 136 - the revision is not duplicated.
133 137
134 138 Return the linkrev of the revision (or None for changelog's revisions).
135 139 """
136 140 lr = obj.linkrev(obj.rev(node))
137 141 if lr < 0 or (self.havecl and lr not in linkrevs):
138 142 if lr < 0 or lr >= len(self.repo.changelog):
139 143 msg = _(b"rev %d points to nonexistent changeset %d")
140 144 else:
141 145 msg = _(b"rev %d points to unexpected changeset %d")
142 146 self._err(None, msg % (i, lr), f)
143 147 if linkrevs:
144 148 if f and len(linkrevs) > 1:
145 149 try:
146 150 # attempt to filter down to real linkrevs
147 151 linkrevs = []
148 152 for lr in linkrevs:
149 153 if self.lrugetctx(lr)[f].filenode() == node:
150 154 linkrevs.append(lr)
151 155 except Exception:
152 156 pass
153 157 msg = _(b" (expected %s)")
154 158 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
155 159 self._warn(msg)
156 160 lr = None # can't be trusted
157 161
158 162 try:
159 163 p1, p2 = obj.parents(node)
160 164 if p1 not in seen and p1 != self.repo.nullid:
161 165 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
162 166 self._err(lr, msg, f)
163 167 if p2 not in seen and p2 != self.repo.nullid:
164 168 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
165 169 self._err(lr, msg, f)
166 170 except Exception as inst:
167 171 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
168 172
169 173 if node in seen:
170 174 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
171 175 seen[node] = i
172 176 return lr
173 177
174 178 def verify(self):
175 179 """verify the content of the Mercurial repository
176 180
177 181 This method run all verifications, displaying issues as they are found.
178 182
179 183 return 1 if any error have been encountered, 0 otherwise."""
180 184 # initial validation and generic report
181 185 repo = self.repo
182 186 ui = repo.ui
183 187 if not repo.url().startswith(b'file:'):
184 188 raise error.Abort(_(b"cannot verify bundle or remote repos"))
185 189
186 190 if os.path.exists(repo.sjoin(b"journal")):
187 191 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
188 192
189 193 if ui.verbose or not self.revlogv1:
190 194 ui.status(
191 195 _(b"repository uses revlog format %d\n")
192 196 % (self.revlogv1 and 1 or 0)
193 197 )
194 198
195 199 # data verification
196 200 mflinkrevs, filelinkrevs = self._verifychangelog()
197 201 filenodes = self._verifymanifest(mflinkrevs)
198 202 del mflinkrevs
199 203 self._crosscheckfiles(filelinkrevs, filenodes)
200 204 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
201 205
202 206 # final report
203 207 ui.status(
204 208 _(b"checked %d changesets with %d changes to %d files\n")
205 209 % (len(repo.changelog), filerevisions, totalfiles)
206 210 )
207 211 if self.warnings:
208 212 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
209 213 if self.fncachewarned:
210 214 ui.warn(HINT_FNCACHE)
211 215 if self.errors:
212 216 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
213 217 if self.badrevs:
214 218 msg = _(b"(first damaged changeset appears to be %d)\n")
215 219 msg %= min(self.badrevs)
216 220 ui.warn(msg)
217 221 return 1
218 222 return 0
219 223
220 224 def _verifychangelog(self):
221 225 """verify the changelog of a repository
222 226
223 227 The following checks are performed:
224 228 - all of `_checkrevlog` checks,
225 229 - all of `_checkentry` checks (for each revisions),
226 230 - each revision can be read.
227 231
228 232 The function returns some of the data observed in the changesets as a
229 233 (mflinkrevs, filelinkrevs) tuples:
230 234 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
231 235 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
232 236
233 237 If a matcher was specified, filelinkrevs will only contains matched
234 238 files.
235 239 """
236 240 ui = self.ui
237 241 repo = self.repo
238 242 match = self.match
239 243 cl = repo.changelog
240 244
241 245 ui.status(_(b"checking changesets\n"))
242 246 mflinkrevs = {}
243 247 filelinkrevs = {}
244 248 seen = {}
245 249 self._checkrevlog(cl, b"changelog", 0)
246 250 progress = ui.makeprogress(
247 251 _(b'checking'), unit=_(b'changesets'), total=len(repo)
248 252 )
249 253 for i in repo:
250 254 progress.update(i)
251 255 n = cl.node(i)
252 256 self._checkentry(cl, i, n, seen, [i], b"changelog")
253 257
254 258 try:
255 259 changes = cl.read(n)
256 260 if changes[0] != self.repo.nullid:
257 261 mflinkrevs.setdefault(changes[0], []).append(i)
258 262 self.refersmf = True
259 263 for f in changes[3]:
260 264 if match(f):
261 265 filelinkrevs.setdefault(_normpath(f), []).append(i)
262 266 except Exception as inst:
263 267 self.refersmf = True
264 268 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
265 269 progress.complete()
266 270 return mflinkrevs, filelinkrevs
267 271
268 272 def _verifymanifest(
269 273 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
270 274 ):
271 275 """verify the manifestlog content
272 276
273 277 Inputs:
274 278 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
275 279 - dir: a subdirectory to check (for tree manifest repo)
276 280 - storefiles: set of currently "orphan" files.
277 281 - subdirprogress: a progress object
278 282
279 283 This function checks:
280 284 * all of `_checkrevlog` checks (for all manifest related revlogs)
281 285 * all of `_checkentry` checks (for all manifest related revisions)
282 286 * nodes for subdirectory exists in the sub-directory manifest
283 287 * each manifest entries have a file path
284 288 * each manifest node refered in mflinkrevs exist in the manifest log
285 289
286 290 If tree manifest is in use and a matchers is specified, only the
287 291 sub-directories matching it will be verified.
288 292
289 293 return a two level mapping:
290 294 {"path" -> { filenode -> changelog-revision}}
291 295
292 296 This mapping primarily contains entries for every files in the
293 297 repository. In addition, when tree-manifest is used, it also contains
294 298 sub-directory entries.
295 299
296 300 If a matcher is provided, only matching paths will be included.
297 301 """
298 302 repo = self.repo
299 303 ui = self.ui
300 304 match = self.match
301 305 mfl = self.repo.manifestlog
302 306 mf = mfl.getstorage(dir)
303 307
304 308 if not dir:
305 309 self.ui.status(_(b"checking manifests\n"))
306 310
307 311 filenodes = {}
308 312 subdirnodes = {}
309 313 seen = {}
310 314 label = b"manifest"
311 315 if dir:
312 316 label = dir
313 317 revlogfiles = mf.files()
314 318 storefiles.difference_update(revlogfiles)
315 319 if subdirprogress: # should be true since we're in a subdirectory
316 320 subdirprogress.increment()
317 321 if self.refersmf:
318 322 # Do not check manifest if there are only changelog entries with
319 323 # null manifests.
320 324 self._checkrevlog(mf._revlog, label, 0)
321 325 progress = ui.makeprogress(
322 326 _(b'checking'), unit=_(b'manifests'), total=len(mf)
323 327 )
324 328 for i in mf:
325 329 if not dir:
326 330 progress.update(i)
327 331 n = mf.node(i)
328 332 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
329 333 if n in mflinkrevs:
330 334 del mflinkrevs[n]
331 335 elif dir:
332 336 msg = _(b"%s not in parent-directory manifest") % short(n)
333 337 self._err(lr, msg, label)
334 338 else:
335 339 self._err(lr, _(b"%s not in changesets") % short(n), label)
336 340
337 341 try:
338 342 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
339 343 for f, fn, fl in mfdelta.iterentries():
340 344 if not f:
341 345 self._err(lr, _(b"entry without name in manifest"))
342 346 elif f == b"/dev/null": # ignore this in very old repos
343 347 continue
344 348 fullpath = dir + _normpath(f)
345 349 if fl == b't':
346 350 if not match.visitdir(fullpath):
347 351 continue
348 352 sdn = subdirnodes.setdefault(fullpath + b'/', {})
349 353 sdn.setdefault(fn, []).append(lr)
350 354 else:
351 355 if not match(fullpath):
352 356 continue
353 357 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
354 358 except Exception as inst:
355 359 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
356 360 if self._level >= VERIFY_FULL:
357 361 try:
358 362 # Various issues can affect manifest. So we read each full
359 363 # text from storage. This triggers the checks from the core
360 364 # code (eg: hash verification, filename are ordered, etc.)
361 365 mfdelta = mfl.get(dir, n).read()
362 366 except Exception as inst:
363 367 msg = _(b"reading full manifest %s") % short(n)
364 368 self._exc(lr, msg, inst, label)
365 369
366 370 if not dir:
367 371 progress.complete()
368 372
369 373 if self.havemf:
370 374 # since we delete entry in `mflinkrevs` during iteration, any
371 375 # remaining entries are "missing". We need to issue errors for them.
372 376 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
373 377 for c, m in sorted(changesetpairs):
374 378 if dir:
375 self._err(
376 c,
377 _(
378 b"parent-directory manifest refers to unknown"
379 b" revision %s"
380 )
381 % short(m),
382 label,
383 )
379 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
384 380 else:
385 381 self._err(
386 382 c,
387 383 _(b"changeset refers to unknown revision %s")
388 384 % short(m),
389 385 label,
390 386 )
391 387
392 388 if not dir and subdirnodes:
393 389 self.ui.status(_(b"checking directory manifests\n"))
394 390 storefiles = set()
395 391 subdirs = set()
396 392 revlogv1 = self.revlogv1
397 393 for t, f, f2, size in repo.store.datafiles():
398 394 if not f:
399 395 self._err(None, _(b"cannot decode filename '%s'") % f2)
400 396 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
401 397 storefiles.add(_normpath(f))
402 398 subdirs.add(os.path.dirname(f))
403 399 subdirprogress = ui.makeprogress(
404 400 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
405 401 )
406 402
407 403 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
408 404 subdirfilenodes = self._verifymanifest(
409 405 linkrevs, subdir, storefiles, subdirprogress
410 406 )
411 407 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
412 408 filenodes.setdefault(f, {}).update(onefilenodes)
413 409
414 410 if not dir and subdirnodes:
415 411 assert subdirprogress is not None # help pytype
416 412 subdirprogress.complete()
417 413 if self.warnorphanstorefiles:
418 414 for f in sorted(storefiles):
419 415 self._warn(_(b"warning: orphan data file '%s'") % f)
420 416
421 417 return filenodes
422 418
423 419 def _crosscheckfiles(self, filelinkrevs, filenodes):
424 420 repo = self.repo
425 421 ui = self.ui
426 422 ui.status(_(b"crosschecking files in changesets and manifests\n"))
427 423
428 424 total = len(filelinkrevs) + len(filenodes)
429 425 progress = ui.makeprogress(
430 426 _(b'crosschecking'), unit=_(b'files'), total=total
431 427 )
432 428 if self.havemf:
433 429 for f in sorted(filelinkrevs):
434 430 progress.increment()
435 431 if f not in filenodes:
436 432 lr = filelinkrevs[f][0]
437 433 self._err(lr, _(b"in changeset but not in manifest"), f)
438 434
439 435 if self.havecl:
440 436 for f in sorted(filenodes):
441 437 progress.increment()
442 438 if f not in filelinkrevs:
443 439 try:
444 440 fl = repo.file(f)
445 441 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
446 442 except Exception:
447 443 lr = None
448 444 self._err(lr, _(b"in manifest but not in changeset"), f)
449 445
450 446 progress.complete()
451 447
452 448 def _verifyfiles(self, filenodes, filelinkrevs):
453 449 repo = self.repo
454 450 ui = self.ui
455 451 lrugetctx = self.lrugetctx
456 452 revlogv1 = self.revlogv1
457 453 havemf = self.havemf
458 454 ui.status(_(b"checking files\n"))
459 455
460 456 storefiles = set()
461 457 for rl_type, f, f2, size in repo.store.datafiles():
462 458 if not f:
463 459 self._err(None, _(b"cannot decode filename '%s'") % f2)
464 460 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
465 461 storefiles.add(_normpath(f))
466 462
467 463 state = {
468 464 # TODO this assumes revlog storage for changelog.
469 465 b'expectedversion': self.repo.changelog._format_version,
470 466 b'skipflags': self.skipflags,
471 467 # experimental config: censor.policy
472 468 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
473 469 }
474 470
475 471 files = sorted(set(filenodes) | set(filelinkrevs))
476 472 revisions = 0
477 473 progress = ui.makeprogress(
478 474 _(b'checking'), unit=_(b'files'), total=len(files)
479 475 )
480 476 for i, f in enumerate(files):
481 477 progress.update(i, item=f)
482 478 try:
483 479 linkrevs = filelinkrevs[f]
484 480 except KeyError:
485 481 # in manifest but not in changelog
486 482 linkrevs = []
487 483
488 484 if linkrevs:
489 485 lr = linkrevs[0]
490 486 else:
491 487 lr = None
492 488
493 489 try:
494 490 fl = repo.file(f)
495 491 except error.StorageError as e:
496 492 self._err(lr, _(b"broken revlog! (%s)") % e, f)
497 493 continue
498 494
499 495 for ff in fl.files():
500 496 try:
501 497 storefiles.remove(ff)
502 498 except KeyError:
503 499 if self.warnorphanstorefiles:
504 500 self._warn(
505 501 _(b" warning: revlog '%s' not in fncache!") % ff
506 502 )
507 503 self.fncachewarned = True
508 504
509 505 if not len(fl) and (self.havecl or self.havemf):
510 506 self._err(lr, _(b"empty or missing %s") % f)
511 507 else:
512 508 # Guard against implementations not setting this.
513 509 state[b'skipread'] = set()
514 510 state[b'safe_renamed'] = set()
515 511
516 512 for problem in fl.verifyintegrity(state):
517 513 if problem.node is not None:
518 514 linkrev = fl.linkrev(fl.rev(problem.node))
519 515 else:
520 516 linkrev = None
521 517
522 518 if problem.warning:
523 519 self._warn(problem.warning)
524 520 elif problem.error:
525 521 self._err(
526 522 linkrev if linkrev is not None else lr,
527 523 problem.error,
528 524 f,
529 525 )
530 526 else:
531 527 raise error.ProgrammingError(
532 528 b'problem instance does not set warning or error '
533 529 b'attribute: %s' % problem.msg
534 530 )
535 531
536 532 seen = {}
537 533 for i in fl:
538 534 revisions += 1
539 535 n = fl.node(i)
540 536 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
541 537 if f in filenodes:
542 538 if havemf and n not in filenodes[f]:
543 539 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
544 540 else:
545 541 del filenodes[f][n]
546 542
547 543 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
548 544 continue
549 545
550 546 # check renames
551 547 try:
552 548 # This requires resolving fulltext (at least on revlogs,
553 549 # though not with LFS revisions). We may want
554 550 # ``verifyintegrity()`` to pass a set of nodes with
555 551 # rename metadata as an optimization.
556 552 rp = fl.renamed(n)
557 553 if rp:
558 554 if lr is not None and ui.verbose:
559 555 ctx = lrugetctx(lr)
560 556 if not any(rp[0] in pctx for pctx in ctx.parents()):
561 557 self._warn(
562 558 _(
563 559 b"warning: copy source of '%s' not"
564 560 b" in parents of %s"
565 561 )
566 562 % (f, ctx)
567 563 )
568 564 fl2 = repo.file(rp[0])
569 565 if not len(fl2):
570 566 self._err(
571 567 lr,
572 568 _(
573 569 b"empty or missing copy source revlog "
574 570 b"%s:%s"
575 571 )
576 572 % (rp[0], short(rp[1])),
577 573 f,
578 574 )
579 575 elif rp[1] == self.repo.nullid:
580 576 ui.note(
581 577 _(
582 578 b"warning: %s@%s: copy source"
583 579 b" revision is nullid %s:%s\n"
584 580 )
585 581 % (f, lr, rp[0], short(rp[1]))
586 582 )
587 583 else:
588 584 fl2.rev(rp[1])
589 585 except Exception as inst:
590 586 self._exc(
591 587 lr, _(b"checking rename of %s") % short(n), inst, f
592 588 )
593 589
594 590 # cross-check
595 591 if f in filenodes:
596 592 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
597 593 for lr, node in sorted(fns):
598 594 self._err(
599 595 lr,
600 596 _(b"manifest refers to unknown revision %s")
601 597 % short(node),
602 598 f,
603 599 )
604 600 progress.complete()
605 601
606 602 if self.warnorphanstorefiles:
607 603 for f in sorted(storefiles):
608 604 self._warn(_(b"warning: orphan data file '%s'") % f)
609 605
610 606 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now