##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48146:fde1df74 default
parent child Browse files
Show More
@@ -1,622 +1,619 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import short
14 14 from .utils import stringutil
15 15
16 16 from . import (
17 17 error,
18 18 pycompat,
19 19 revlog,
20 20 util,
21 21 )
22 22
23 23 VERIFY_DEFAULT = 0
24 24 VERIFY_FULL = 1
25 25
26 26
27 27 def verify(repo, level=None):
28 28 with repo.lock():
29 29 v = verifier(repo, level)
30 30 return v.verify()
31 31
32 32
33 33 def _normpath(f):
34 34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 35 # converted repo may contain repeated slashes
36 36 while b'//' in f:
37 37 f = f.replace(b'//', b'/')
38 38 return f
39 39
40 40
41 41 class verifier(object):
42 42 def __init__(self, repo, level=None):
43 43 self.repo = repo.unfiltered()
44 44 self.ui = repo.ui
45 45 self.match = repo.narrowmatch()
46 46 if level is None:
47 47 level = VERIFY_DEFAULT
48 48 self._level = level
49 49 self.badrevs = set()
50 50 self.errors = 0
51 51 self.warnings = 0
52 52 self.havecl = len(repo.changelog) > 0
53 53 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
54 54 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
55 55 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
56 56 self.refersmf = False
57 57 self.fncachewarned = False
58 58 # developer config: verify.skipflags
59 59 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
60 60 self.warnorphanstorefiles = True
61 61
62 62 def _warn(self, msg):
63 63 """record a "warning" level issue"""
64 64 self.ui.warn(msg + b"\n")
65 65 self.warnings += 1
66 66
67 67 def _err(self, linkrev, msg, filename=None):
68 68 """record a "error" level issue"""
69 69 if linkrev is not None:
70 70 self.badrevs.add(linkrev)
71 71 linkrev = b"%d" % linkrev
72 72 else:
73 73 linkrev = b'?'
74 74 msg = b"%s: %s" % (linkrev, msg)
75 75 if filename:
76 76 msg = b"%s@%s" % (filename, msg)
77 77 self.ui.warn(b" " + msg + b"\n")
78 78 self.errors += 1
79 79
80 80 def _exc(self, linkrev, msg, inst, filename=None):
81 81 """record exception raised during the verify process"""
82 82 fmsg = stringutil.forcebytestr(inst)
83 83 if not fmsg:
84 84 fmsg = pycompat.byterepr(inst)
85 85 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
86 86
87 87 def _checkrevlog(self, obj, name, linkrev):
88 88 """verify high level property of a revlog
89 89
90 90 - revlog is present,
91 91 - revlog is non-empty,
92 92 - sizes (index and data) are correct,
93 93 - revlog's format version is correct.
94 94 """
95 95 if not len(obj) and (self.havecl or self.havemf):
96 96 self._err(linkrev, _(b"empty or missing %s") % name)
97 97 return
98 98
99 99 d = obj.checksize()
100 100 if d[0]:
101 101 self._err(None, _(b"data length off by %d bytes") % d[0], name)
102 102 if d[1]:
103 103 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
104 104
105 105 if obj._format_version != revlog.REVLOGV0:
106 106 if not self.revlogv1:
107 107 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
108 108 elif self.revlogv1:
109 109 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
110 110
111 111 def _checkentry(self, obj, i, node, seen, linkrevs, f):
112 112 """verify a single revlog entry
113 113
114 114 arguments are:
115 115 - obj: the source revlog
116 116 - i: the revision number
117 117 - node: the revision node id
118 118 - seen: nodes previously seen for this revlog
119 119 - linkrevs: [changelog-revisions] introducing "node"
120 120 - f: string label ("changelog", "manifest", or filename)
121 121
122 122 Performs the following checks:
123 123 - linkrev points to an existing changelog revision,
124 124 - linkrev points to a changelog revision that introduces this revision,
125 125 - linkrev points to the lowest of these changesets,
126 126 - both parents exist in the revlog,
127 127 - the revision is not duplicated.
128 128
129 129 Return the linkrev of the revision (or None for changelog's revisions).
130 130 """
131 131 lr = obj.linkrev(obj.rev(node))
132 132 if lr < 0 or (self.havecl and lr not in linkrevs):
133 133 if lr < 0 or lr >= len(self.repo.changelog):
134 134 msg = _(b"rev %d points to nonexistent changeset %d")
135 135 else:
136 136 msg = _(b"rev %d points to unexpected changeset %d")
137 137 self._err(None, msg % (i, lr), f)
138 138 if linkrevs:
139 139 if f and len(linkrevs) > 1:
140 140 try:
141 141 # attempt to filter down to real linkrevs
142 142 linkrevs = []
143 143 for lr in linkrevs:
144 144 if self.lrugetctx(lr)[f].filenode() == node:
145 145 linkrevs.append(lr)
146 146 except Exception:
147 147 pass
148 148 msg = _(b" (expected %s)")
149 149 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
150 150 self._warn(msg)
151 151 lr = None # can't be trusted
152 152
153 153 try:
154 154 p1, p2 = obj.parents(node)
155 155 if p1 not in seen and p1 != self.repo.nullid:
156 156 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
157 157 self._err(lr, msg, f)
158 158 if p2 not in seen and p2 != self.repo.nullid:
159 self._err(
160 lr,
161 _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
162 f,
163 )
159 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
160 self._err(lr, msg, f)
164 161 except Exception as inst:
165 162 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
166 163
167 164 if node in seen:
168 165 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
169 166 seen[node] = i
170 167 return lr
171 168
172 169 def verify(self):
173 170 """verify the content of the Mercurial repository
174 171
175 172 This method run all verifications, displaying issues as they are found.
176 173
177 174 return 1 if any error have been encountered, 0 otherwise."""
178 175 # initial validation and generic report
179 176 repo = self.repo
180 177 ui = repo.ui
181 178 if not repo.url().startswith(b'file:'):
182 179 raise error.Abort(_(b"cannot verify bundle or remote repos"))
183 180
184 181 if os.path.exists(repo.sjoin(b"journal")):
185 182 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
186 183
187 184 if ui.verbose or not self.revlogv1:
188 185 ui.status(
189 186 _(b"repository uses revlog format %d\n")
190 187 % (self.revlogv1 and 1 or 0)
191 188 )
192 189
193 190 # data verification
194 191 mflinkrevs, filelinkrevs = self._verifychangelog()
195 192 filenodes = self._verifymanifest(mflinkrevs)
196 193 del mflinkrevs
197 194 self._crosscheckfiles(filelinkrevs, filenodes)
198 195 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
199 196
200 197 # final report
201 198 ui.status(
202 199 _(b"checked %d changesets with %d changes to %d files\n")
203 200 % (len(repo.changelog), filerevisions, totalfiles)
204 201 )
205 202 if self.warnings:
206 203 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
207 204 if self.fncachewarned:
208 205 ui.warn(
209 206 _(
210 207 b'hint: run "hg debugrebuildfncache" to recover from '
211 208 b'corrupt fncache\n'
212 209 )
213 210 )
214 211 if self.errors:
215 212 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
216 213 if self.badrevs:
217 214 ui.warn(
218 215 _(b"(first damaged changeset appears to be %d)\n")
219 216 % min(self.badrevs)
220 217 )
221 218 return 1
222 219 return 0
223 220
224 221 def _verifychangelog(self):
225 222 """verify the changelog of a repository
226 223
227 224 The following checks are performed:
228 225 - all of `_checkrevlog` checks,
229 226 - all of `_checkentry` checks (for each revisions),
230 227 - each revision can be read.
231 228
232 229 The function returns some of the data observed in the changesets as a
233 230 (mflinkrevs, filelinkrevs) tuples:
234 231 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
235 232 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
236 233
237 234 If a matcher was specified, filelinkrevs will only contains matched
238 235 files.
239 236 """
240 237 ui = self.ui
241 238 repo = self.repo
242 239 match = self.match
243 240 cl = repo.changelog
244 241
245 242 ui.status(_(b"checking changesets\n"))
246 243 mflinkrevs = {}
247 244 filelinkrevs = {}
248 245 seen = {}
249 246 self._checkrevlog(cl, b"changelog", 0)
250 247 progress = ui.makeprogress(
251 248 _(b'checking'), unit=_(b'changesets'), total=len(repo)
252 249 )
253 250 for i in repo:
254 251 progress.update(i)
255 252 n = cl.node(i)
256 253 self._checkentry(cl, i, n, seen, [i], b"changelog")
257 254
258 255 try:
259 256 changes = cl.read(n)
260 257 if changes[0] != self.repo.nullid:
261 258 mflinkrevs.setdefault(changes[0], []).append(i)
262 259 self.refersmf = True
263 260 for f in changes[3]:
264 261 if match(f):
265 262 filelinkrevs.setdefault(_normpath(f), []).append(i)
266 263 except Exception as inst:
267 264 self.refersmf = True
268 265 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
269 266 progress.complete()
270 267 return mflinkrevs, filelinkrevs
271 268
272 269 def _verifymanifest(
273 270 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
274 271 ):
275 272 """verify the manifestlog content
276 273
277 274 Inputs:
278 275 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
279 276 - dir: a subdirectory to check (for tree manifest repo)
280 277 - storefiles: set of currently "orphan" files.
281 278 - subdirprogress: a progress object
282 279
283 280 This function checks:
284 281 * all of `_checkrevlog` checks (for all manifest related revlogs)
285 282 * all of `_checkentry` checks (for all manifest related revisions)
286 283 * nodes for subdirectory exists in the sub-directory manifest
287 284 * each manifest entries have a file path
288 285 * each manifest node refered in mflinkrevs exist in the manifest log
289 286
290 287 If tree manifest is in use and a matchers is specified, only the
291 288 sub-directories matching it will be verified.
292 289
293 290 return a two level mapping:
294 291 {"path" -> { filenode -> changelog-revision}}
295 292
296 293 This mapping primarily contains entries for every files in the
297 294 repository. In addition, when tree-manifest is used, it also contains
298 295 sub-directory entries.
299 296
300 297 If a matcher is provided, only matching paths will be included.
301 298 """
302 299 repo = self.repo
303 300 ui = self.ui
304 301 match = self.match
305 302 mfl = self.repo.manifestlog
306 303 mf = mfl.getstorage(dir)
307 304
308 305 if not dir:
309 306 self.ui.status(_(b"checking manifests\n"))
310 307
311 308 filenodes = {}
312 309 subdirnodes = {}
313 310 seen = {}
314 311 label = b"manifest"
315 312 if dir:
316 313 label = dir
317 314 revlogfiles = mf.files()
318 315 storefiles.difference_update(revlogfiles)
319 316 if subdirprogress: # should be true since we're in a subdirectory
320 317 subdirprogress.increment()
321 318 if self.refersmf:
322 319 # Do not check manifest if there are only changelog entries with
323 320 # null manifests.
324 321 self._checkrevlog(mf._revlog, label, 0)
325 322 progress = ui.makeprogress(
326 323 _(b'checking'), unit=_(b'manifests'), total=len(mf)
327 324 )
328 325 for i in mf:
329 326 if not dir:
330 327 progress.update(i)
331 328 n = mf.node(i)
332 329 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
333 330 if n in mflinkrevs:
334 331 del mflinkrevs[n]
335 332 elif dir:
336 333 self._err(
337 334 lr,
338 335 _(b"%s not in parent-directory manifest") % short(n),
339 336 label,
340 337 )
341 338 else:
342 339 self._err(lr, _(b"%s not in changesets") % short(n), label)
343 340
344 341 try:
345 342 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
346 343 for f, fn, fl in mfdelta.iterentries():
347 344 if not f:
348 345 self._err(lr, _(b"entry without name in manifest"))
349 346 elif f == b"/dev/null": # ignore this in very old repos
350 347 continue
351 348 fullpath = dir + _normpath(f)
352 349 if fl == b't':
353 350 if not match.visitdir(fullpath):
354 351 continue
355 352 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
356 353 fn, []
357 354 ).append(lr)
358 355 else:
359 356 if not match(fullpath):
360 357 continue
361 358 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
362 359 except Exception as inst:
363 360 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
364 361 if self._level >= VERIFY_FULL:
365 362 try:
366 363 # Various issues can affect manifest. So we read each full
367 364 # text from storage. This triggers the checks from the core
368 365 # code (eg: hash verification, filename are ordered, etc.)
369 366 mfdelta = mfl.get(dir, n).read()
370 367 except Exception as inst:
371 368 self._exc(
372 369 lr,
373 370 _(b"reading full manifest %s") % short(n),
374 371 inst,
375 372 label,
376 373 )
377 374
378 375 if not dir:
379 376 progress.complete()
380 377
381 378 if self.havemf:
382 379 # since we delete entry in `mflinkrevs` during iteration, any
383 380 # remaining entries are "missing". We need to issue errors for them.
384 381 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
385 382 for c, m in sorted(changesetpairs):
386 383 if dir:
387 384 self._err(
388 385 c,
389 386 _(
390 387 b"parent-directory manifest refers to unknown"
391 388 b" revision %s"
392 389 )
393 390 % short(m),
394 391 label,
395 392 )
396 393 else:
397 394 self._err(
398 395 c,
399 396 _(b"changeset refers to unknown revision %s")
400 397 % short(m),
401 398 label,
402 399 )
403 400
404 401 if not dir and subdirnodes:
405 402 self.ui.status(_(b"checking directory manifests\n"))
406 403 storefiles = set()
407 404 subdirs = set()
408 405 revlogv1 = self.revlogv1
409 406 for t, f, f2, size in repo.store.datafiles():
410 407 if not f:
411 408 self._err(None, _(b"cannot decode filename '%s'") % f2)
412 409 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
413 410 storefiles.add(_normpath(f))
414 411 subdirs.add(os.path.dirname(f))
415 412 subdirprogress = ui.makeprogress(
416 413 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
417 414 )
418 415
419 416 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
420 417 subdirfilenodes = self._verifymanifest(
421 418 linkrevs, subdir, storefiles, subdirprogress
422 419 )
423 420 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
424 421 filenodes.setdefault(f, {}).update(onefilenodes)
425 422
426 423 if not dir and subdirnodes:
427 424 assert subdirprogress is not None # help pytype
428 425 subdirprogress.complete()
429 426 if self.warnorphanstorefiles:
430 427 for f in sorted(storefiles):
431 428 self._warn(_(b"warning: orphan data file '%s'") % f)
432 429
433 430 return filenodes
434 431
435 432 def _crosscheckfiles(self, filelinkrevs, filenodes):
436 433 repo = self.repo
437 434 ui = self.ui
438 435 ui.status(_(b"crosschecking files in changesets and manifests\n"))
439 436
440 437 total = len(filelinkrevs) + len(filenodes)
441 438 progress = ui.makeprogress(
442 439 _(b'crosschecking'), unit=_(b'files'), total=total
443 440 )
444 441 if self.havemf:
445 442 for f in sorted(filelinkrevs):
446 443 progress.increment()
447 444 if f not in filenodes:
448 445 lr = filelinkrevs[f][0]
449 446 self._err(lr, _(b"in changeset but not in manifest"), f)
450 447
451 448 if self.havecl:
452 449 for f in sorted(filenodes):
453 450 progress.increment()
454 451 if f not in filelinkrevs:
455 452 try:
456 453 fl = repo.file(f)
457 454 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
458 455 except Exception:
459 456 lr = None
460 457 self._err(lr, _(b"in manifest but not in changeset"), f)
461 458
462 459 progress.complete()
463 460
464 461 def _verifyfiles(self, filenodes, filelinkrevs):
465 462 repo = self.repo
466 463 ui = self.ui
467 464 lrugetctx = self.lrugetctx
468 465 revlogv1 = self.revlogv1
469 466 havemf = self.havemf
470 467 ui.status(_(b"checking files\n"))
471 468
472 469 storefiles = set()
473 470 for rl_type, f, f2, size in repo.store.datafiles():
474 471 if not f:
475 472 self._err(None, _(b"cannot decode filename '%s'") % f2)
476 473 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
477 474 storefiles.add(_normpath(f))
478 475
479 476 state = {
480 477 # TODO this assumes revlog storage for changelog.
481 478 b'expectedversion': self.repo.changelog._format_version,
482 479 b'skipflags': self.skipflags,
483 480 # experimental config: censor.policy
484 481 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
485 482 }
486 483
487 484 files = sorted(set(filenodes) | set(filelinkrevs))
488 485 revisions = 0
489 486 progress = ui.makeprogress(
490 487 _(b'checking'), unit=_(b'files'), total=len(files)
491 488 )
492 489 for i, f in enumerate(files):
493 490 progress.update(i, item=f)
494 491 try:
495 492 linkrevs = filelinkrevs[f]
496 493 except KeyError:
497 494 # in manifest but not in changelog
498 495 linkrevs = []
499 496
500 497 if linkrevs:
501 498 lr = linkrevs[0]
502 499 else:
503 500 lr = None
504 501
505 502 try:
506 503 fl = repo.file(f)
507 504 except error.StorageError as e:
508 505 self._err(lr, _(b"broken revlog! (%s)") % e, f)
509 506 continue
510 507
511 508 for ff in fl.files():
512 509 try:
513 510 storefiles.remove(ff)
514 511 except KeyError:
515 512 if self.warnorphanstorefiles:
516 513 self._warn(
517 514 _(b" warning: revlog '%s' not in fncache!") % ff
518 515 )
519 516 self.fncachewarned = True
520 517
521 518 if not len(fl) and (self.havecl or self.havemf):
522 519 self._err(lr, _(b"empty or missing %s") % f)
523 520 else:
524 521 # Guard against implementations not setting this.
525 522 state[b'skipread'] = set()
526 523 state[b'safe_renamed'] = set()
527 524
528 525 for problem in fl.verifyintegrity(state):
529 526 if problem.node is not None:
530 527 linkrev = fl.linkrev(fl.rev(problem.node))
531 528 else:
532 529 linkrev = None
533 530
534 531 if problem.warning:
535 532 self._warn(problem.warning)
536 533 elif problem.error:
537 534 self._err(
538 535 linkrev if linkrev is not None else lr,
539 536 problem.error,
540 537 f,
541 538 )
542 539 else:
543 540 raise error.ProgrammingError(
544 541 b'problem instance does not set warning or error '
545 542 b'attribute: %s' % problem.msg
546 543 )
547 544
548 545 seen = {}
549 546 for i in fl:
550 547 revisions += 1
551 548 n = fl.node(i)
552 549 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
553 550 if f in filenodes:
554 551 if havemf and n not in filenodes[f]:
555 552 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
556 553 else:
557 554 del filenodes[f][n]
558 555
559 556 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
560 557 continue
561 558
562 559 # check renames
563 560 try:
564 561 # This requires resolving fulltext (at least on revlogs,
565 562 # though not with LFS revisions). We may want
566 563 # ``verifyintegrity()`` to pass a set of nodes with
567 564 # rename metadata as an optimization.
568 565 rp = fl.renamed(n)
569 566 if rp:
570 567 if lr is not None and ui.verbose:
571 568 ctx = lrugetctx(lr)
572 569 if not any(rp[0] in pctx for pctx in ctx.parents()):
573 570 self._warn(
574 571 _(
575 572 b"warning: copy source of '%s' not"
576 573 b" in parents of %s"
577 574 )
578 575 % (f, ctx)
579 576 )
580 577 fl2 = repo.file(rp[0])
581 578 if not len(fl2):
582 579 self._err(
583 580 lr,
584 581 _(
585 582 b"empty or missing copy source revlog "
586 583 b"%s:%s"
587 584 )
588 585 % (rp[0], short(rp[1])),
589 586 f,
590 587 )
591 588 elif rp[1] == self.repo.nullid:
592 589 ui.note(
593 590 _(
594 591 b"warning: %s@%s: copy source"
595 592 b" revision is nullid %s:%s\n"
596 593 )
597 594 % (f, lr, rp[0], short(rp[1]))
598 595 )
599 596 else:
600 597 fl2.rev(rp[1])
601 598 except Exception as inst:
602 599 self._exc(
603 600 lr, _(b"checking rename of %s") % short(n), inst, f
604 601 )
605 602
606 603 # cross-check
607 604 if f in filenodes:
608 605 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
609 606 for lr, node in sorted(fns):
610 607 self._err(
611 608 lr,
612 609 _(b"manifest refers to unknown revision %s")
613 610 % short(node),
614 611 f,
615 612 )
616 613 progress.complete()
617 614
618 615 if self.warnorphanstorefiles:
619 616 for f in sorted(storefiles):
620 617 self._warn(_(b"warning: orphan data file '%s'") % f)
621 618
622 619 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now