##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48144:0f4beb88 default
parent child Browse files
Show More
@@ -1,626 +1,625 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import short
14 14 from .utils import stringutil
15 15
16 16 from . import (
17 17 error,
18 18 pycompat,
19 19 revlog,
20 20 util,
21 21 )
22 22
23 23 VERIFY_DEFAULT = 0
24 24 VERIFY_FULL = 1
25 25
26 26
27 27 def verify(repo, level=None):
28 28 with repo.lock():
29 29 v = verifier(repo, level)
30 30 return v.verify()
31 31
32 32
33 33 def _normpath(f):
34 34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 35 # converted repo may contain repeated slashes
36 36 while b'//' in f:
37 37 f = f.replace(b'//', b'/')
38 38 return f
39 39
40 40
41 41 class verifier(object):
42 42 def __init__(self, repo, level=None):
43 43 self.repo = repo.unfiltered()
44 44 self.ui = repo.ui
45 45 self.match = repo.narrowmatch()
46 46 if level is None:
47 47 level = VERIFY_DEFAULT
48 48 self._level = level
49 49 self.badrevs = set()
50 50 self.errors = 0
51 51 self.warnings = 0
52 52 self.havecl = len(repo.changelog) > 0
53 53 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
54 54 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
55 55 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
56 56 self.refersmf = False
57 57 self.fncachewarned = False
58 58 # developer config: verify.skipflags
59 59 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
60 60 self.warnorphanstorefiles = True
61 61
62 62 def _warn(self, msg):
63 63 """record a "warning" level issue"""
64 64 self.ui.warn(msg + b"\n")
65 65 self.warnings += 1
66 66
67 67 def _err(self, linkrev, msg, filename=None):
68 68 """record a "error" level issue"""
69 69 if linkrev is not None:
70 70 self.badrevs.add(linkrev)
71 71 linkrev = b"%d" % linkrev
72 72 else:
73 73 linkrev = b'?'
74 74 msg = b"%s: %s" % (linkrev, msg)
75 75 if filename:
76 76 msg = b"%s@%s" % (filename, msg)
77 77 self.ui.warn(b" " + msg + b"\n")
78 78 self.errors += 1
79 79
80 80 def _exc(self, linkrev, msg, inst, filename=None):
81 81 """record exception raised during the verify process"""
82 82 fmsg = stringutil.forcebytestr(inst)
83 83 if not fmsg:
84 84 fmsg = pycompat.byterepr(inst)
85 85 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
86 86
87 87 def _checkrevlog(self, obj, name, linkrev):
88 88 """verify high level property of a revlog
89 89
90 90 - revlog is present,
91 91 - revlog is non-empty,
92 92 - sizes (index and data) are correct,
93 93 - revlog's format version is correct.
94 94 """
95 95 if not len(obj) and (self.havecl or self.havemf):
96 96 self._err(linkrev, _(b"empty or missing %s") % name)
97 97 return
98 98
99 99 d = obj.checksize()
100 100 if d[0]:
101 101 self._err(None, _(b"data length off by %d bytes") % d[0], name)
102 102 if d[1]:
103 103 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
104 104
105 105 if obj._format_version != revlog.REVLOGV0:
106 106 if not self.revlogv1:
107 107 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
108 108 elif self.revlogv1:
109 109 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
110 110
111 111 def _checkentry(self, obj, i, node, seen, linkrevs, f):
112 112 """verify a single revlog entry
113 113
114 114 arguments are:
115 115 - obj: the source revlog
116 116 - i: the revision number
117 117 - node: the revision node id
118 118 - seen: nodes previously seen for this revlog
119 119 - linkrevs: [changelog-revisions] introducing "node"
120 120 - f: string label ("changelog", "manifest", or filename)
121 121
122 122 Performs the following checks:
123 123 - linkrev points to an existing changelog revision,
124 124 - linkrev points to a changelog revision that introduces this revision,
125 125 - linkrev points to the lowest of these changesets,
126 126 - both parents exist in the revlog,
127 127 - the revision is not duplicated.
128 128
129 129 Return the linkrev of the revision (or None for changelog's revisions).
130 130 """
131 131 lr = obj.linkrev(obj.rev(node))
132 132 if lr < 0 or (self.havecl and lr not in linkrevs):
133 133 if lr < 0 or lr >= len(self.repo.changelog):
134 134 msg = _(b"rev %d points to nonexistent changeset %d")
135 135 else:
136 136 msg = _(b"rev %d points to unexpected changeset %d")
137 137 self._err(None, msg % (i, lr), f)
138 138 if linkrevs:
139 139 if f and len(linkrevs) > 1:
140 140 try:
141 141 # attempt to filter down to real linkrevs
142 142 linkrevs = []
143 143 for lr in linkrevs:
144 144 if self.lrugetctx(lr)[f].filenode() == node:
145 145 linkrevs.append(lr)
146 146 except Exception:
147 147 pass
148 self._warn(
149 _(b" (expected %s)")
150 % b" ".join(map(pycompat.bytestr, linkrevs))
151 )
148 msg = _(b" (expected %s)")
149 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
150 self._warn(msg)
152 151 lr = None # can't be trusted
153 152
154 153 try:
155 154 p1, p2 = obj.parents(node)
156 155 if p1 not in seen and p1 != self.repo.nullid:
157 156 self._err(
158 157 lr,
159 158 _(b"unknown parent 1 %s of %s") % (short(p1), short(node)),
160 159 f,
161 160 )
162 161 if p2 not in seen and p2 != self.repo.nullid:
163 162 self._err(
164 163 lr,
165 164 _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
166 165 f,
167 166 )
168 167 except Exception as inst:
169 168 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
170 169
171 170 if node in seen:
172 171 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
173 172 seen[node] = i
174 173 return lr
175 174
176 175 def verify(self):
177 176 """verify the content of the Mercurial repository
178 177
179 178 This method run all verifications, displaying issues as they are found.
180 179
181 180 return 1 if any error have been encountered, 0 otherwise."""
182 181 # initial validation and generic report
183 182 repo = self.repo
184 183 ui = repo.ui
185 184 if not repo.url().startswith(b'file:'):
186 185 raise error.Abort(_(b"cannot verify bundle or remote repos"))
187 186
188 187 if os.path.exists(repo.sjoin(b"journal")):
189 188 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
190 189
191 190 if ui.verbose or not self.revlogv1:
192 191 ui.status(
193 192 _(b"repository uses revlog format %d\n")
194 193 % (self.revlogv1 and 1 or 0)
195 194 )
196 195
197 196 # data verification
198 197 mflinkrevs, filelinkrevs = self._verifychangelog()
199 198 filenodes = self._verifymanifest(mflinkrevs)
200 199 del mflinkrevs
201 200 self._crosscheckfiles(filelinkrevs, filenodes)
202 201 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
203 202
204 203 # final report
205 204 ui.status(
206 205 _(b"checked %d changesets with %d changes to %d files\n")
207 206 % (len(repo.changelog), filerevisions, totalfiles)
208 207 )
209 208 if self.warnings:
210 209 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
211 210 if self.fncachewarned:
212 211 ui.warn(
213 212 _(
214 213 b'hint: run "hg debugrebuildfncache" to recover from '
215 214 b'corrupt fncache\n'
216 215 )
217 216 )
218 217 if self.errors:
219 218 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
220 219 if self.badrevs:
221 220 ui.warn(
222 221 _(b"(first damaged changeset appears to be %d)\n")
223 222 % min(self.badrevs)
224 223 )
225 224 return 1
226 225 return 0
227 226
228 227 def _verifychangelog(self):
229 228 """verify the changelog of a repository
230 229
231 230 The following checks are performed:
232 231 - all of `_checkrevlog` checks,
233 232 - all of `_checkentry` checks (for each revisions),
234 233 - each revision can be read.
235 234
236 235 The function returns some of the data observed in the changesets as a
237 236 (mflinkrevs, filelinkrevs) tuples:
238 237 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
239 238 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
240 239
241 240 If a matcher was specified, filelinkrevs will only contains matched
242 241 files.
243 242 """
244 243 ui = self.ui
245 244 repo = self.repo
246 245 match = self.match
247 246 cl = repo.changelog
248 247
249 248 ui.status(_(b"checking changesets\n"))
250 249 mflinkrevs = {}
251 250 filelinkrevs = {}
252 251 seen = {}
253 252 self._checkrevlog(cl, b"changelog", 0)
254 253 progress = ui.makeprogress(
255 254 _(b'checking'), unit=_(b'changesets'), total=len(repo)
256 255 )
257 256 for i in repo:
258 257 progress.update(i)
259 258 n = cl.node(i)
260 259 self._checkentry(cl, i, n, seen, [i], b"changelog")
261 260
262 261 try:
263 262 changes = cl.read(n)
264 263 if changes[0] != self.repo.nullid:
265 264 mflinkrevs.setdefault(changes[0], []).append(i)
266 265 self.refersmf = True
267 266 for f in changes[3]:
268 267 if match(f):
269 268 filelinkrevs.setdefault(_normpath(f), []).append(i)
270 269 except Exception as inst:
271 270 self.refersmf = True
272 271 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
273 272 progress.complete()
274 273 return mflinkrevs, filelinkrevs
275 274
276 275 def _verifymanifest(
277 276 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
278 277 ):
279 278 """verify the manifestlog content
280 279
281 280 Inputs:
282 281 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
283 282 - dir: a subdirectory to check (for tree manifest repo)
284 283 - storefiles: set of currently "orphan" files.
285 284 - subdirprogress: a progress object
286 285
287 286 This function checks:
288 287 * all of `_checkrevlog` checks (for all manifest related revlogs)
289 288 * all of `_checkentry` checks (for all manifest related revisions)
290 289 * nodes for subdirectory exists in the sub-directory manifest
291 290 * each manifest entries have a file path
292 291 * each manifest node refered in mflinkrevs exist in the manifest log
293 292
294 293 If tree manifest is in use and a matchers is specified, only the
295 294 sub-directories matching it will be verified.
296 295
297 296 return a two level mapping:
298 297 {"path" -> { filenode -> changelog-revision}}
299 298
300 299 This mapping primarily contains entries for every files in the
301 300 repository. In addition, when tree-manifest is used, it also contains
302 301 sub-directory entries.
303 302
304 303 If a matcher is provided, only matching paths will be included.
305 304 """
306 305 repo = self.repo
307 306 ui = self.ui
308 307 match = self.match
309 308 mfl = self.repo.manifestlog
310 309 mf = mfl.getstorage(dir)
311 310
312 311 if not dir:
313 312 self.ui.status(_(b"checking manifests\n"))
314 313
315 314 filenodes = {}
316 315 subdirnodes = {}
317 316 seen = {}
318 317 label = b"manifest"
319 318 if dir:
320 319 label = dir
321 320 revlogfiles = mf.files()
322 321 storefiles.difference_update(revlogfiles)
323 322 if subdirprogress: # should be true since we're in a subdirectory
324 323 subdirprogress.increment()
325 324 if self.refersmf:
326 325 # Do not check manifest if there are only changelog entries with
327 326 # null manifests.
328 327 self._checkrevlog(mf._revlog, label, 0)
329 328 progress = ui.makeprogress(
330 329 _(b'checking'), unit=_(b'manifests'), total=len(mf)
331 330 )
332 331 for i in mf:
333 332 if not dir:
334 333 progress.update(i)
335 334 n = mf.node(i)
336 335 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
337 336 if n in mflinkrevs:
338 337 del mflinkrevs[n]
339 338 elif dir:
340 339 self._err(
341 340 lr,
342 341 _(b"%s not in parent-directory manifest") % short(n),
343 342 label,
344 343 )
345 344 else:
346 345 self._err(lr, _(b"%s not in changesets") % short(n), label)
347 346
348 347 try:
349 348 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
350 349 for f, fn, fl in mfdelta.iterentries():
351 350 if not f:
352 351 self._err(lr, _(b"entry without name in manifest"))
353 352 elif f == b"/dev/null": # ignore this in very old repos
354 353 continue
355 354 fullpath = dir + _normpath(f)
356 355 if fl == b't':
357 356 if not match.visitdir(fullpath):
358 357 continue
359 358 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
360 359 fn, []
361 360 ).append(lr)
362 361 else:
363 362 if not match(fullpath):
364 363 continue
365 364 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
366 365 except Exception as inst:
367 366 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
368 367 if self._level >= VERIFY_FULL:
369 368 try:
370 369 # Various issues can affect manifest. So we read each full
371 370 # text from storage. This triggers the checks from the core
372 371 # code (eg: hash verification, filename are ordered, etc.)
373 372 mfdelta = mfl.get(dir, n).read()
374 373 except Exception as inst:
375 374 self._exc(
376 375 lr,
377 376 _(b"reading full manifest %s") % short(n),
378 377 inst,
379 378 label,
380 379 )
381 380
382 381 if not dir:
383 382 progress.complete()
384 383
385 384 if self.havemf:
386 385 # since we delete entry in `mflinkrevs` during iteration, any
387 386 # remaining entries are "missing". We need to issue errors for them.
388 387 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
389 388 for c, m in sorted(changesetpairs):
390 389 if dir:
391 390 self._err(
392 391 c,
393 392 _(
394 393 b"parent-directory manifest refers to unknown"
395 394 b" revision %s"
396 395 )
397 396 % short(m),
398 397 label,
399 398 )
400 399 else:
401 400 self._err(
402 401 c,
403 402 _(b"changeset refers to unknown revision %s")
404 403 % short(m),
405 404 label,
406 405 )
407 406
408 407 if not dir and subdirnodes:
409 408 self.ui.status(_(b"checking directory manifests\n"))
410 409 storefiles = set()
411 410 subdirs = set()
412 411 revlogv1 = self.revlogv1
413 412 for t, f, f2, size in repo.store.datafiles():
414 413 if not f:
415 414 self._err(None, _(b"cannot decode filename '%s'") % f2)
416 415 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
417 416 storefiles.add(_normpath(f))
418 417 subdirs.add(os.path.dirname(f))
419 418 subdirprogress = ui.makeprogress(
420 419 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
421 420 )
422 421
423 422 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
424 423 subdirfilenodes = self._verifymanifest(
425 424 linkrevs, subdir, storefiles, subdirprogress
426 425 )
427 426 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
428 427 filenodes.setdefault(f, {}).update(onefilenodes)
429 428
430 429 if not dir and subdirnodes:
431 430 assert subdirprogress is not None # help pytype
432 431 subdirprogress.complete()
433 432 if self.warnorphanstorefiles:
434 433 for f in sorted(storefiles):
435 434 self._warn(_(b"warning: orphan data file '%s'") % f)
436 435
437 436 return filenodes
438 437
439 438 def _crosscheckfiles(self, filelinkrevs, filenodes):
440 439 repo = self.repo
441 440 ui = self.ui
442 441 ui.status(_(b"crosschecking files in changesets and manifests\n"))
443 442
444 443 total = len(filelinkrevs) + len(filenodes)
445 444 progress = ui.makeprogress(
446 445 _(b'crosschecking'), unit=_(b'files'), total=total
447 446 )
448 447 if self.havemf:
449 448 for f in sorted(filelinkrevs):
450 449 progress.increment()
451 450 if f not in filenodes:
452 451 lr = filelinkrevs[f][0]
453 452 self._err(lr, _(b"in changeset but not in manifest"), f)
454 453
455 454 if self.havecl:
456 455 for f in sorted(filenodes):
457 456 progress.increment()
458 457 if f not in filelinkrevs:
459 458 try:
460 459 fl = repo.file(f)
461 460 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
462 461 except Exception:
463 462 lr = None
464 463 self._err(lr, _(b"in manifest but not in changeset"), f)
465 464
466 465 progress.complete()
467 466
468 467 def _verifyfiles(self, filenodes, filelinkrevs):
469 468 repo = self.repo
470 469 ui = self.ui
471 470 lrugetctx = self.lrugetctx
472 471 revlogv1 = self.revlogv1
473 472 havemf = self.havemf
474 473 ui.status(_(b"checking files\n"))
475 474
476 475 storefiles = set()
477 476 for rl_type, f, f2, size in repo.store.datafiles():
478 477 if not f:
479 478 self._err(None, _(b"cannot decode filename '%s'") % f2)
480 479 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
481 480 storefiles.add(_normpath(f))
482 481
483 482 state = {
484 483 # TODO this assumes revlog storage for changelog.
485 484 b'expectedversion': self.repo.changelog._format_version,
486 485 b'skipflags': self.skipflags,
487 486 # experimental config: censor.policy
488 487 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
489 488 }
490 489
491 490 files = sorted(set(filenodes) | set(filelinkrevs))
492 491 revisions = 0
493 492 progress = ui.makeprogress(
494 493 _(b'checking'), unit=_(b'files'), total=len(files)
495 494 )
496 495 for i, f in enumerate(files):
497 496 progress.update(i, item=f)
498 497 try:
499 498 linkrevs = filelinkrevs[f]
500 499 except KeyError:
501 500 # in manifest but not in changelog
502 501 linkrevs = []
503 502
504 503 if linkrevs:
505 504 lr = linkrevs[0]
506 505 else:
507 506 lr = None
508 507
509 508 try:
510 509 fl = repo.file(f)
511 510 except error.StorageError as e:
512 511 self._err(lr, _(b"broken revlog! (%s)") % e, f)
513 512 continue
514 513
515 514 for ff in fl.files():
516 515 try:
517 516 storefiles.remove(ff)
518 517 except KeyError:
519 518 if self.warnorphanstorefiles:
520 519 self._warn(
521 520 _(b" warning: revlog '%s' not in fncache!") % ff
522 521 )
523 522 self.fncachewarned = True
524 523
525 524 if not len(fl) and (self.havecl or self.havemf):
526 525 self._err(lr, _(b"empty or missing %s") % f)
527 526 else:
528 527 # Guard against implementations not setting this.
529 528 state[b'skipread'] = set()
530 529 state[b'safe_renamed'] = set()
531 530
532 531 for problem in fl.verifyintegrity(state):
533 532 if problem.node is not None:
534 533 linkrev = fl.linkrev(fl.rev(problem.node))
535 534 else:
536 535 linkrev = None
537 536
538 537 if problem.warning:
539 538 self._warn(problem.warning)
540 539 elif problem.error:
541 540 self._err(
542 541 linkrev if linkrev is not None else lr,
543 542 problem.error,
544 543 f,
545 544 )
546 545 else:
547 546 raise error.ProgrammingError(
548 547 b'problem instance does not set warning or error '
549 548 b'attribute: %s' % problem.msg
550 549 )
551 550
552 551 seen = {}
553 552 for i in fl:
554 553 revisions += 1
555 554 n = fl.node(i)
556 555 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
557 556 if f in filenodes:
558 557 if havemf and n not in filenodes[f]:
559 558 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
560 559 else:
561 560 del filenodes[f][n]
562 561
563 562 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
564 563 continue
565 564
566 565 # check renames
567 566 try:
568 567 # This requires resolving fulltext (at least on revlogs,
569 568 # though not with LFS revisions). We may want
570 569 # ``verifyintegrity()`` to pass a set of nodes with
571 570 # rename metadata as an optimization.
572 571 rp = fl.renamed(n)
573 572 if rp:
574 573 if lr is not None and ui.verbose:
575 574 ctx = lrugetctx(lr)
576 575 if not any(rp[0] in pctx for pctx in ctx.parents()):
577 576 self._warn(
578 577 _(
579 578 b"warning: copy source of '%s' not"
580 579 b" in parents of %s"
581 580 )
582 581 % (f, ctx)
583 582 )
584 583 fl2 = repo.file(rp[0])
585 584 if not len(fl2):
586 585 self._err(
587 586 lr,
588 587 _(
589 588 b"empty or missing copy source revlog "
590 589 b"%s:%s"
591 590 )
592 591 % (rp[0], short(rp[1])),
593 592 f,
594 593 )
595 594 elif rp[1] == self.repo.nullid:
596 595 ui.note(
597 596 _(
598 597 b"warning: %s@%s: copy source"
599 598 b" revision is nullid %s:%s\n"
600 599 )
601 600 % (f, lr, rp[0], short(rp[1]))
602 601 )
603 602 else:
604 603 fl2.rev(rp[1])
605 604 except Exception as inst:
606 605 self._exc(
607 606 lr, _(b"checking rename of %s") % short(n), inst, f
608 607 )
609 608
610 609 # cross-check
611 610 if f in filenodes:
612 611 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
613 612 for lr, node in sorted(fns):
614 613 self._err(
615 614 lr,
616 615 _(b"manifest refers to unknown revision %s")
617 616 % short(node),
618 617 f,
619 618 )
620 619 progress.complete()
621 620
622 621 if self.warnorphanstorefiles:
623 622 for f in sorted(storefiles):
624 623 self._warn(_(b"warning: orphan data file '%s'") % f)
625 624
626 625 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now