##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48149:0693dc0b default
parent child Browse files
Show More
@@ -1,618 +1,615
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import short
14 14 from .utils import stringutil
15 15
16 16 from . import (
17 17 error,
18 18 pycompat,
19 19 revlog,
20 20 util,
21 21 )
22 22
23 23 VERIFY_DEFAULT = 0
24 24 VERIFY_FULL = 1
25 25
26 26
27 27 def verify(repo, level=None):
28 28 with repo.lock():
29 29 v = verifier(repo, level)
30 30 return v.verify()
31 31
32 32
33 33 def _normpath(f):
34 34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 35 # converted repo may contain repeated slashes
36 36 while b'//' in f:
37 37 f = f.replace(b'//', b'/')
38 38 return f
39 39
40 40
41 41 HINT_FNCACHE = _(
42 42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 43 )
44 44
45 45
46 46 class verifier(object):
47 47 def __init__(self, repo, level=None):
48 48 self.repo = repo.unfiltered()
49 49 self.ui = repo.ui
50 50 self.match = repo.narrowmatch()
51 51 if level is None:
52 52 level = VERIFY_DEFAULT
53 53 self._level = level
54 54 self.badrevs = set()
55 55 self.errors = 0
56 56 self.warnings = 0
57 57 self.havecl = len(repo.changelog) > 0
58 58 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
59 59 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
60 60 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
61 61 self.refersmf = False
62 62 self.fncachewarned = False
63 63 # developer config: verify.skipflags
64 64 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
65 65 self.warnorphanstorefiles = True
66 66
67 67 def _warn(self, msg):
68 68 """record a "warning" level issue"""
69 69 self.ui.warn(msg + b"\n")
70 70 self.warnings += 1
71 71
72 72 def _err(self, linkrev, msg, filename=None):
73 73 """record a "error" level issue"""
74 74 if linkrev is not None:
75 75 self.badrevs.add(linkrev)
76 76 linkrev = b"%d" % linkrev
77 77 else:
78 78 linkrev = b'?'
79 79 msg = b"%s: %s" % (linkrev, msg)
80 80 if filename:
81 81 msg = b"%s@%s" % (filename, msg)
82 82 self.ui.warn(b" " + msg + b"\n")
83 83 self.errors += 1
84 84
85 85 def _exc(self, linkrev, msg, inst, filename=None):
86 86 """record exception raised during the verify process"""
87 87 fmsg = stringutil.forcebytestr(inst)
88 88 if not fmsg:
89 89 fmsg = pycompat.byterepr(inst)
90 90 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
91 91
92 92 def _checkrevlog(self, obj, name, linkrev):
93 93 """verify high level property of a revlog
94 94
95 95 - revlog is present,
96 96 - revlog is non-empty,
97 97 - sizes (index and data) are correct,
98 98 - revlog's format version is correct.
99 99 """
100 100 if not len(obj) and (self.havecl or self.havemf):
101 101 self._err(linkrev, _(b"empty or missing %s") % name)
102 102 return
103 103
104 104 d = obj.checksize()
105 105 if d[0]:
106 106 self._err(None, _(b"data length off by %d bytes") % d[0], name)
107 107 if d[1]:
108 108 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
109 109
110 110 if obj._format_version != revlog.REVLOGV0:
111 111 if not self.revlogv1:
112 112 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
113 113 elif self.revlogv1:
114 114 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
115 115
116 116 def _checkentry(self, obj, i, node, seen, linkrevs, f):
117 117 """verify a single revlog entry
118 118
119 119 arguments are:
120 120 - obj: the source revlog
121 121 - i: the revision number
122 122 - node: the revision node id
123 123 - seen: nodes previously seen for this revlog
124 124 - linkrevs: [changelog-revisions] introducing "node"
125 125 - f: string label ("changelog", "manifest", or filename)
126 126
127 127 Performs the following checks:
128 128 - linkrev points to an existing changelog revision,
129 129 - linkrev points to a changelog revision that introduces this revision,
130 130 - linkrev points to the lowest of these changesets,
131 131 - both parents exist in the revlog,
132 132 - the revision is not duplicated.
133 133
134 134 Return the linkrev of the revision (or None for changelog's revisions).
135 135 """
136 136 lr = obj.linkrev(obj.rev(node))
137 137 if lr < 0 or (self.havecl and lr not in linkrevs):
138 138 if lr < 0 or lr >= len(self.repo.changelog):
139 139 msg = _(b"rev %d points to nonexistent changeset %d")
140 140 else:
141 141 msg = _(b"rev %d points to unexpected changeset %d")
142 142 self._err(None, msg % (i, lr), f)
143 143 if linkrevs:
144 144 if f and len(linkrevs) > 1:
145 145 try:
146 146 # attempt to filter down to real linkrevs
147 147 linkrevs = []
148 148 for lr in linkrevs:
149 149 if self.lrugetctx(lr)[f].filenode() == node:
150 150 linkrevs.append(lr)
151 151 except Exception:
152 152 pass
153 153 msg = _(b" (expected %s)")
154 154 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
155 155 self._warn(msg)
156 156 lr = None # can't be trusted
157 157
158 158 try:
159 159 p1, p2 = obj.parents(node)
160 160 if p1 not in seen and p1 != self.repo.nullid:
161 161 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
162 162 self._err(lr, msg, f)
163 163 if p2 not in seen and p2 != self.repo.nullid:
164 164 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
165 165 self._err(lr, msg, f)
166 166 except Exception as inst:
167 167 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
168 168
169 169 if node in seen:
170 170 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
171 171 seen[node] = i
172 172 return lr
173 173
174 174 def verify(self):
175 175 """verify the content of the Mercurial repository
176 176
177 177 This method run all verifications, displaying issues as they are found.
178 178
179 179 return 1 if any error have been encountered, 0 otherwise."""
180 180 # initial validation and generic report
181 181 repo = self.repo
182 182 ui = repo.ui
183 183 if not repo.url().startswith(b'file:'):
184 184 raise error.Abort(_(b"cannot verify bundle or remote repos"))
185 185
186 186 if os.path.exists(repo.sjoin(b"journal")):
187 187 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
188 188
189 189 if ui.verbose or not self.revlogv1:
190 190 ui.status(
191 191 _(b"repository uses revlog format %d\n")
192 192 % (self.revlogv1 and 1 or 0)
193 193 )
194 194
195 195 # data verification
196 196 mflinkrevs, filelinkrevs = self._verifychangelog()
197 197 filenodes = self._verifymanifest(mflinkrevs)
198 198 del mflinkrevs
199 199 self._crosscheckfiles(filelinkrevs, filenodes)
200 200 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
201 201
202 202 # final report
203 203 ui.status(
204 204 _(b"checked %d changesets with %d changes to %d files\n")
205 205 % (len(repo.changelog), filerevisions, totalfiles)
206 206 )
207 207 if self.warnings:
208 208 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
209 209 if self.fncachewarned:
210 210 ui.warn(HINT_FNCACHE)
211 211 if self.errors:
212 212 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
213 213 if self.badrevs:
214 214 msg = _(b"(first damaged changeset appears to be %d)\n")
215 215 msg %= min(self.badrevs)
216 216 ui.warn(msg)
217 217 return 1
218 218 return 0
219 219
220 220 def _verifychangelog(self):
221 221 """verify the changelog of a repository
222 222
223 223 The following checks are performed:
224 224 - all of `_checkrevlog` checks,
225 225 - all of `_checkentry` checks (for each revisions),
226 226 - each revision can be read.
227 227
228 228 The function returns some of the data observed in the changesets as a
229 229 (mflinkrevs, filelinkrevs) tuples:
230 230 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
231 231 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
232 232
233 233 If a matcher was specified, filelinkrevs will only contains matched
234 234 files.
235 235 """
236 236 ui = self.ui
237 237 repo = self.repo
238 238 match = self.match
239 239 cl = repo.changelog
240 240
241 241 ui.status(_(b"checking changesets\n"))
242 242 mflinkrevs = {}
243 243 filelinkrevs = {}
244 244 seen = {}
245 245 self._checkrevlog(cl, b"changelog", 0)
246 246 progress = ui.makeprogress(
247 247 _(b'checking'), unit=_(b'changesets'), total=len(repo)
248 248 )
249 249 for i in repo:
250 250 progress.update(i)
251 251 n = cl.node(i)
252 252 self._checkentry(cl, i, n, seen, [i], b"changelog")
253 253
254 254 try:
255 255 changes = cl.read(n)
256 256 if changes[0] != self.repo.nullid:
257 257 mflinkrevs.setdefault(changes[0], []).append(i)
258 258 self.refersmf = True
259 259 for f in changes[3]:
260 260 if match(f):
261 261 filelinkrevs.setdefault(_normpath(f), []).append(i)
262 262 except Exception as inst:
263 263 self.refersmf = True
264 264 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
265 265 progress.complete()
266 266 return mflinkrevs, filelinkrevs
267 267
268 268 def _verifymanifest(
269 269 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
270 270 ):
271 271 """verify the manifestlog content
272 272
273 273 Inputs:
274 274 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
275 275 - dir: a subdirectory to check (for tree manifest repo)
276 276 - storefiles: set of currently "orphan" files.
277 277 - subdirprogress: a progress object
278 278
279 279 This function checks:
280 280 * all of `_checkrevlog` checks (for all manifest related revlogs)
281 281 * all of `_checkentry` checks (for all manifest related revisions)
282 282 * nodes for subdirectory exists in the sub-directory manifest
283 283 * each manifest entries have a file path
284 284 * each manifest node refered in mflinkrevs exist in the manifest log
285 285
286 286 If tree manifest is in use and a matchers is specified, only the
287 287 sub-directories matching it will be verified.
288 288
289 289 return a two level mapping:
290 290 {"path" -> { filenode -> changelog-revision}}
291 291
292 292 This mapping primarily contains entries for every files in the
293 293 repository. In addition, when tree-manifest is used, it also contains
294 294 sub-directory entries.
295 295
296 296 If a matcher is provided, only matching paths will be included.
297 297 """
298 298 repo = self.repo
299 299 ui = self.ui
300 300 match = self.match
301 301 mfl = self.repo.manifestlog
302 302 mf = mfl.getstorage(dir)
303 303
304 304 if not dir:
305 305 self.ui.status(_(b"checking manifests\n"))
306 306
307 307 filenodes = {}
308 308 subdirnodes = {}
309 309 seen = {}
310 310 label = b"manifest"
311 311 if dir:
312 312 label = dir
313 313 revlogfiles = mf.files()
314 314 storefiles.difference_update(revlogfiles)
315 315 if subdirprogress: # should be true since we're in a subdirectory
316 316 subdirprogress.increment()
317 317 if self.refersmf:
318 318 # Do not check manifest if there are only changelog entries with
319 319 # null manifests.
320 320 self._checkrevlog(mf._revlog, label, 0)
321 321 progress = ui.makeprogress(
322 322 _(b'checking'), unit=_(b'manifests'), total=len(mf)
323 323 )
324 324 for i in mf:
325 325 if not dir:
326 326 progress.update(i)
327 327 n = mf.node(i)
328 328 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
329 329 if n in mflinkrevs:
330 330 del mflinkrevs[n]
331 331 elif dir:
332 self._err(
333 lr,
334 _(b"%s not in parent-directory manifest") % short(n),
335 label,
336 )
332 msg = _(b"%s not in parent-directory manifest") % short(n)
333 self._err(lr, msg, label)
337 334 else:
338 335 self._err(lr, _(b"%s not in changesets") % short(n), label)
339 336
340 337 try:
341 338 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
342 339 for f, fn, fl in mfdelta.iterentries():
343 340 if not f:
344 341 self._err(lr, _(b"entry without name in manifest"))
345 342 elif f == b"/dev/null": # ignore this in very old repos
346 343 continue
347 344 fullpath = dir + _normpath(f)
348 345 if fl == b't':
349 346 if not match.visitdir(fullpath):
350 347 continue
351 348 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
352 349 fn, []
353 350 ).append(lr)
354 351 else:
355 352 if not match(fullpath):
356 353 continue
357 354 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
358 355 except Exception as inst:
359 356 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
360 357 if self._level >= VERIFY_FULL:
361 358 try:
362 359 # Various issues can affect manifest. So we read each full
363 360 # text from storage. This triggers the checks from the core
364 361 # code (eg: hash verification, filename are ordered, etc.)
365 362 mfdelta = mfl.get(dir, n).read()
366 363 except Exception as inst:
367 364 self._exc(
368 365 lr,
369 366 _(b"reading full manifest %s") % short(n),
370 367 inst,
371 368 label,
372 369 )
373 370
374 371 if not dir:
375 372 progress.complete()
376 373
377 374 if self.havemf:
378 375 # since we delete entry in `mflinkrevs` during iteration, any
379 376 # remaining entries are "missing". We need to issue errors for them.
380 377 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
381 378 for c, m in sorted(changesetpairs):
382 379 if dir:
383 380 self._err(
384 381 c,
385 382 _(
386 383 b"parent-directory manifest refers to unknown"
387 384 b" revision %s"
388 385 )
389 386 % short(m),
390 387 label,
391 388 )
392 389 else:
393 390 self._err(
394 391 c,
395 392 _(b"changeset refers to unknown revision %s")
396 393 % short(m),
397 394 label,
398 395 )
399 396
400 397 if not dir and subdirnodes:
401 398 self.ui.status(_(b"checking directory manifests\n"))
402 399 storefiles = set()
403 400 subdirs = set()
404 401 revlogv1 = self.revlogv1
405 402 for t, f, f2, size in repo.store.datafiles():
406 403 if not f:
407 404 self._err(None, _(b"cannot decode filename '%s'") % f2)
408 405 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
409 406 storefiles.add(_normpath(f))
410 407 subdirs.add(os.path.dirname(f))
411 408 subdirprogress = ui.makeprogress(
412 409 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
413 410 )
414 411
415 412 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
416 413 subdirfilenodes = self._verifymanifest(
417 414 linkrevs, subdir, storefiles, subdirprogress
418 415 )
419 416 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
420 417 filenodes.setdefault(f, {}).update(onefilenodes)
421 418
422 419 if not dir and subdirnodes:
423 420 assert subdirprogress is not None # help pytype
424 421 subdirprogress.complete()
425 422 if self.warnorphanstorefiles:
426 423 for f in sorted(storefiles):
427 424 self._warn(_(b"warning: orphan data file '%s'") % f)
428 425
429 426 return filenodes
430 427
431 428 def _crosscheckfiles(self, filelinkrevs, filenodes):
432 429 repo = self.repo
433 430 ui = self.ui
434 431 ui.status(_(b"crosschecking files in changesets and manifests\n"))
435 432
436 433 total = len(filelinkrevs) + len(filenodes)
437 434 progress = ui.makeprogress(
438 435 _(b'crosschecking'), unit=_(b'files'), total=total
439 436 )
440 437 if self.havemf:
441 438 for f in sorted(filelinkrevs):
442 439 progress.increment()
443 440 if f not in filenodes:
444 441 lr = filelinkrevs[f][0]
445 442 self._err(lr, _(b"in changeset but not in manifest"), f)
446 443
447 444 if self.havecl:
448 445 for f in sorted(filenodes):
449 446 progress.increment()
450 447 if f not in filelinkrevs:
451 448 try:
452 449 fl = repo.file(f)
453 450 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
454 451 except Exception:
455 452 lr = None
456 453 self._err(lr, _(b"in manifest but not in changeset"), f)
457 454
458 455 progress.complete()
459 456
460 457 def _verifyfiles(self, filenodes, filelinkrevs):
461 458 repo = self.repo
462 459 ui = self.ui
463 460 lrugetctx = self.lrugetctx
464 461 revlogv1 = self.revlogv1
465 462 havemf = self.havemf
466 463 ui.status(_(b"checking files\n"))
467 464
468 465 storefiles = set()
469 466 for rl_type, f, f2, size in repo.store.datafiles():
470 467 if not f:
471 468 self._err(None, _(b"cannot decode filename '%s'") % f2)
472 469 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
473 470 storefiles.add(_normpath(f))
474 471
475 472 state = {
476 473 # TODO this assumes revlog storage for changelog.
477 474 b'expectedversion': self.repo.changelog._format_version,
478 475 b'skipflags': self.skipflags,
479 476 # experimental config: censor.policy
480 477 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
481 478 }
482 479
483 480 files = sorted(set(filenodes) | set(filelinkrevs))
484 481 revisions = 0
485 482 progress = ui.makeprogress(
486 483 _(b'checking'), unit=_(b'files'), total=len(files)
487 484 )
488 485 for i, f in enumerate(files):
489 486 progress.update(i, item=f)
490 487 try:
491 488 linkrevs = filelinkrevs[f]
492 489 except KeyError:
493 490 # in manifest but not in changelog
494 491 linkrevs = []
495 492
496 493 if linkrevs:
497 494 lr = linkrevs[0]
498 495 else:
499 496 lr = None
500 497
501 498 try:
502 499 fl = repo.file(f)
503 500 except error.StorageError as e:
504 501 self._err(lr, _(b"broken revlog! (%s)") % e, f)
505 502 continue
506 503
507 504 for ff in fl.files():
508 505 try:
509 506 storefiles.remove(ff)
510 507 except KeyError:
511 508 if self.warnorphanstorefiles:
512 509 self._warn(
513 510 _(b" warning: revlog '%s' not in fncache!") % ff
514 511 )
515 512 self.fncachewarned = True
516 513
517 514 if not len(fl) and (self.havecl or self.havemf):
518 515 self._err(lr, _(b"empty or missing %s") % f)
519 516 else:
520 517 # Guard against implementations not setting this.
521 518 state[b'skipread'] = set()
522 519 state[b'safe_renamed'] = set()
523 520
524 521 for problem in fl.verifyintegrity(state):
525 522 if problem.node is not None:
526 523 linkrev = fl.linkrev(fl.rev(problem.node))
527 524 else:
528 525 linkrev = None
529 526
530 527 if problem.warning:
531 528 self._warn(problem.warning)
532 529 elif problem.error:
533 530 self._err(
534 531 linkrev if linkrev is not None else lr,
535 532 problem.error,
536 533 f,
537 534 )
538 535 else:
539 536 raise error.ProgrammingError(
540 537 b'problem instance does not set warning or error '
541 538 b'attribute: %s' % problem.msg
542 539 )
543 540
544 541 seen = {}
545 542 for i in fl:
546 543 revisions += 1
547 544 n = fl.node(i)
548 545 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
549 546 if f in filenodes:
550 547 if havemf and n not in filenodes[f]:
551 548 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
552 549 else:
553 550 del filenodes[f][n]
554 551
555 552 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
556 553 continue
557 554
558 555 # check renames
559 556 try:
560 557 # This requires resolving fulltext (at least on revlogs,
561 558 # though not with LFS revisions). We may want
562 559 # ``verifyintegrity()`` to pass a set of nodes with
563 560 # rename metadata as an optimization.
564 561 rp = fl.renamed(n)
565 562 if rp:
566 563 if lr is not None and ui.verbose:
567 564 ctx = lrugetctx(lr)
568 565 if not any(rp[0] in pctx for pctx in ctx.parents()):
569 566 self._warn(
570 567 _(
571 568 b"warning: copy source of '%s' not"
572 569 b" in parents of %s"
573 570 )
574 571 % (f, ctx)
575 572 )
576 573 fl2 = repo.file(rp[0])
577 574 if not len(fl2):
578 575 self._err(
579 576 lr,
580 577 _(
581 578 b"empty or missing copy source revlog "
582 579 b"%s:%s"
583 580 )
584 581 % (rp[0], short(rp[1])),
585 582 f,
586 583 )
587 584 elif rp[1] == self.repo.nullid:
588 585 ui.note(
589 586 _(
590 587 b"warning: %s@%s: copy source"
591 588 b" revision is nullid %s:%s\n"
592 589 )
593 590 % (f, lr, rp[0], short(rp[1]))
594 591 )
595 592 else:
596 593 fl2.rev(rp[1])
597 594 except Exception as inst:
598 595 self._exc(
599 596 lr, _(b"checking rename of %s") % short(n), inst, f
600 597 )
601 598
602 599 # cross-check
603 600 if f in filenodes:
604 601 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
605 602 for lr, node in sorted(fns):
606 603 self._err(
607 604 lr,
608 605 _(b"manifest refers to unknown revision %s")
609 606 % short(node),
610 607 f,
611 608 )
612 609 progress.complete()
613 610
614 611 if self.warnorphanstorefiles:
615 612 for f in sorted(storefiles):
616 613 self._warn(_(b"warning: orphan data file '%s'") % f)
617 614
618 615 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now