##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48153:80c690bf default
parent child Browse files
Show More
@@ -1,606 +1,603 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import short
14 14 from .utils import stringutil
15 15
16 16 from . import (
17 17 error,
18 18 pycompat,
19 19 revlog,
20 20 util,
21 21 )
22 22
23 23 VERIFY_DEFAULT = 0
24 24 VERIFY_FULL = 1
25 25
26 26
27 27 def verify(repo, level=None):
28 28 with repo.lock():
29 29 v = verifier(repo, level)
30 30 return v.verify()
31 31
32 32
33 33 def _normpath(f):
34 34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 35 # converted repo may contain repeated slashes
36 36 while b'//' in f:
37 37 f = f.replace(b'//', b'/')
38 38 return f
39 39
40 40
41 41 HINT_FNCACHE = _(
42 42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 43 )
44 44
45 45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 46 b"parent-directory manifest refers to unknown revision %s"
47 47 )
48 48
49 49
50 50 class verifier(object):
51 51 def __init__(self, repo, level=None):
52 52 self.repo = repo.unfiltered()
53 53 self.ui = repo.ui
54 54 self.match = repo.narrowmatch()
55 55 if level is None:
56 56 level = VERIFY_DEFAULT
57 57 self._level = level
58 58 self.badrevs = set()
59 59 self.errors = 0
60 60 self.warnings = 0
61 61 self.havecl = len(repo.changelog) > 0
62 62 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
63 63 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
64 64 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
65 65 self.refersmf = False
66 66 self.fncachewarned = False
67 67 # developer config: verify.skipflags
68 68 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
69 69 self.warnorphanstorefiles = True
70 70
71 71 def _warn(self, msg):
72 72 """record a "warning" level issue"""
73 73 self.ui.warn(msg + b"\n")
74 74 self.warnings += 1
75 75
76 76 def _err(self, linkrev, msg, filename=None):
77 77 """record a "error" level issue"""
78 78 if linkrev is not None:
79 79 self.badrevs.add(linkrev)
80 80 linkrev = b"%d" % linkrev
81 81 else:
82 82 linkrev = b'?'
83 83 msg = b"%s: %s" % (linkrev, msg)
84 84 if filename:
85 85 msg = b"%s@%s" % (filename, msg)
86 86 self.ui.warn(b" " + msg + b"\n")
87 87 self.errors += 1
88 88
89 89 def _exc(self, linkrev, msg, inst, filename=None):
90 90 """record exception raised during the verify process"""
91 91 fmsg = stringutil.forcebytestr(inst)
92 92 if not fmsg:
93 93 fmsg = pycompat.byterepr(inst)
94 94 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
95 95
96 96 def _checkrevlog(self, obj, name, linkrev):
97 97 """verify high level property of a revlog
98 98
99 99 - revlog is present,
100 100 - revlog is non-empty,
101 101 - sizes (index and data) are correct,
102 102 - revlog's format version is correct.
103 103 """
104 104 if not len(obj) and (self.havecl or self.havemf):
105 105 self._err(linkrev, _(b"empty or missing %s") % name)
106 106 return
107 107
108 108 d = obj.checksize()
109 109 if d[0]:
110 110 self._err(None, _(b"data length off by %d bytes") % d[0], name)
111 111 if d[1]:
112 112 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
113 113
114 114 if obj._format_version != revlog.REVLOGV0:
115 115 if not self.revlogv1:
116 116 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
117 117 elif self.revlogv1:
118 118 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
119 119
120 120 def _checkentry(self, obj, i, node, seen, linkrevs, f):
121 121 """verify a single revlog entry
122 122
123 123 arguments are:
124 124 - obj: the source revlog
125 125 - i: the revision number
126 126 - node: the revision node id
127 127 - seen: nodes previously seen for this revlog
128 128 - linkrevs: [changelog-revisions] introducing "node"
129 129 - f: string label ("changelog", "manifest", or filename)
130 130
131 131 Performs the following checks:
132 132 - linkrev points to an existing changelog revision,
133 133 - linkrev points to a changelog revision that introduces this revision,
134 134 - linkrev points to the lowest of these changesets,
135 135 - both parents exist in the revlog,
136 136 - the revision is not duplicated.
137 137
138 138 Return the linkrev of the revision (or None for changelog's revisions).
139 139 """
140 140 lr = obj.linkrev(obj.rev(node))
141 141 if lr < 0 or (self.havecl and lr not in linkrevs):
142 142 if lr < 0 or lr >= len(self.repo.changelog):
143 143 msg = _(b"rev %d points to nonexistent changeset %d")
144 144 else:
145 145 msg = _(b"rev %d points to unexpected changeset %d")
146 146 self._err(None, msg % (i, lr), f)
147 147 if linkrevs:
148 148 if f and len(linkrevs) > 1:
149 149 try:
150 150 # attempt to filter down to real linkrevs
151 151 linkrevs = []
152 152 for lr in linkrevs:
153 153 if self.lrugetctx(lr)[f].filenode() == node:
154 154 linkrevs.append(lr)
155 155 except Exception:
156 156 pass
157 157 msg = _(b" (expected %s)")
158 158 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
159 159 self._warn(msg)
160 160 lr = None # can't be trusted
161 161
162 162 try:
163 163 p1, p2 = obj.parents(node)
164 164 if p1 not in seen and p1 != self.repo.nullid:
165 165 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
166 166 self._err(lr, msg, f)
167 167 if p2 not in seen and p2 != self.repo.nullid:
168 168 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
169 169 self._err(lr, msg, f)
170 170 except Exception as inst:
171 171 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
172 172
173 173 if node in seen:
174 174 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
175 175 seen[node] = i
176 176 return lr
177 177
178 178 def verify(self):
179 179 """verify the content of the Mercurial repository
180 180
181 181 This method run all verifications, displaying issues as they are found.
182 182
183 183 return 1 if any error have been encountered, 0 otherwise."""
184 184 # initial validation and generic report
185 185 repo = self.repo
186 186 ui = repo.ui
187 187 if not repo.url().startswith(b'file:'):
188 188 raise error.Abort(_(b"cannot verify bundle or remote repos"))
189 189
190 190 if os.path.exists(repo.sjoin(b"journal")):
191 191 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
192 192
193 193 if ui.verbose or not self.revlogv1:
194 194 ui.status(
195 195 _(b"repository uses revlog format %d\n")
196 196 % (self.revlogv1 and 1 or 0)
197 197 )
198 198
199 199 # data verification
200 200 mflinkrevs, filelinkrevs = self._verifychangelog()
201 201 filenodes = self._verifymanifest(mflinkrevs)
202 202 del mflinkrevs
203 203 self._crosscheckfiles(filelinkrevs, filenodes)
204 204 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
205 205
206 206 # final report
207 207 ui.status(
208 208 _(b"checked %d changesets with %d changes to %d files\n")
209 209 % (len(repo.changelog), filerevisions, totalfiles)
210 210 )
211 211 if self.warnings:
212 212 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
213 213 if self.fncachewarned:
214 214 ui.warn(HINT_FNCACHE)
215 215 if self.errors:
216 216 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
217 217 if self.badrevs:
218 218 msg = _(b"(first damaged changeset appears to be %d)\n")
219 219 msg %= min(self.badrevs)
220 220 ui.warn(msg)
221 221 return 1
222 222 return 0
223 223
224 224 def _verifychangelog(self):
225 225 """verify the changelog of a repository
226 226
227 227 The following checks are performed:
228 228 - all of `_checkrevlog` checks,
229 229 - all of `_checkentry` checks (for each revisions),
230 230 - each revision can be read.
231 231
232 232 The function returns some of the data observed in the changesets as a
233 233 (mflinkrevs, filelinkrevs) tuples:
234 234 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
235 235 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
236 236
237 237 If a matcher was specified, filelinkrevs will only contains matched
238 238 files.
239 239 """
240 240 ui = self.ui
241 241 repo = self.repo
242 242 match = self.match
243 243 cl = repo.changelog
244 244
245 245 ui.status(_(b"checking changesets\n"))
246 246 mflinkrevs = {}
247 247 filelinkrevs = {}
248 248 seen = {}
249 249 self._checkrevlog(cl, b"changelog", 0)
250 250 progress = ui.makeprogress(
251 251 _(b'checking'), unit=_(b'changesets'), total=len(repo)
252 252 )
253 253 for i in repo:
254 254 progress.update(i)
255 255 n = cl.node(i)
256 256 self._checkentry(cl, i, n, seen, [i], b"changelog")
257 257
258 258 try:
259 259 changes = cl.read(n)
260 260 if changes[0] != self.repo.nullid:
261 261 mflinkrevs.setdefault(changes[0], []).append(i)
262 262 self.refersmf = True
263 263 for f in changes[3]:
264 264 if match(f):
265 265 filelinkrevs.setdefault(_normpath(f), []).append(i)
266 266 except Exception as inst:
267 267 self.refersmf = True
268 268 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
269 269 progress.complete()
270 270 return mflinkrevs, filelinkrevs
271 271
272 272 def _verifymanifest(
273 273 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
274 274 ):
275 275 """verify the manifestlog content
276 276
277 277 Inputs:
278 278 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
279 279 - dir: a subdirectory to check (for tree manifest repo)
280 280 - storefiles: set of currently "orphan" files.
281 281 - subdirprogress: a progress object
282 282
283 283 This function checks:
284 284 * all of `_checkrevlog` checks (for all manifest related revlogs)
285 285 * all of `_checkentry` checks (for all manifest related revisions)
286 286 * nodes for subdirectory exists in the sub-directory manifest
287 287 * each manifest entries have a file path
288 288 * each manifest node refered in mflinkrevs exist in the manifest log
289 289
290 290 If tree manifest is in use and a matchers is specified, only the
291 291 sub-directories matching it will be verified.
292 292
293 293 return a two level mapping:
294 294 {"path" -> { filenode -> changelog-revision}}
295 295
296 296 This mapping primarily contains entries for every files in the
297 297 repository. In addition, when tree-manifest is used, it also contains
298 298 sub-directory entries.
299 299
300 300 If a matcher is provided, only matching paths will be included.
301 301 """
302 302 repo = self.repo
303 303 ui = self.ui
304 304 match = self.match
305 305 mfl = self.repo.manifestlog
306 306 mf = mfl.getstorage(dir)
307 307
308 308 if not dir:
309 309 self.ui.status(_(b"checking manifests\n"))
310 310
311 311 filenodes = {}
312 312 subdirnodes = {}
313 313 seen = {}
314 314 label = b"manifest"
315 315 if dir:
316 316 label = dir
317 317 revlogfiles = mf.files()
318 318 storefiles.difference_update(revlogfiles)
319 319 if subdirprogress: # should be true since we're in a subdirectory
320 320 subdirprogress.increment()
321 321 if self.refersmf:
322 322 # Do not check manifest if there are only changelog entries with
323 323 # null manifests.
324 324 self._checkrevlog(mf._revlog, label, 0)
325 325 progress = ui.makeprogress(
326 326 _(b'checking'), unit=_(b'manifests'), total=len(mf)
327 327 )
328 328 for i in mf:
329 329 if not dir:
330 330 progress.update(i)
331 331 n = mf.node(i)
332 332 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
333 333 if n in mflinkrevs:
334 334 del mflinkrevs[n]
335 335 elif dir:
336 336 msg = _(b"%s not in parent-directory manifest") % short(n)
337 337 self._err(lr, msg, label)
338 338 else:
339 339 self._err(lr, _(b"%s not in changesets") % short(n), label)
340 340
341 341 try:
342 342 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
343 343 for f, fn, fl in mfdelta.iterentries():
344 344 if not f:
345 345 self._err(lr, _(b"entry without name in manifest"))
346 346 elif f == b"/dev/null": # ignore this in very old repos
347 347 continue
348 348 fullpath = dir + _normpath(f)
349 349 if fl == b't':
350 350 if not match.visitdir(fullpath):
351 351 continue
352 352 sdn = subdirnodes.setdefault(fullpath + b'/', {})
353 353 sdn.setdefault(fn, []).append(lr)
354 354 else:
355 355 if not match(fullpath):
356 356 continue
357 357 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
358 358 except Exception as inst:
359 359 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
360 360 if self._level >= VERIFY_FULL:
361 361 try:
362 362 # Various issues can affect manifest. So we read each full
363 363 # text from storage. This triggers the checks from the core
364 364 # code (eg: hash verification, filename are ordered, etc.)
365 365 mfdelta = mfl.get(dir, n).read()
366 366 except Exception as inst:
367 367 msg = _(b"reading full manifest %s") % short(n)
368 368 self._exc(lr, msg, inst, label)
369 369
370 370 if not dir:
371 371 progress.complete()
372 372
373 373 if self.havemf:
374 374 # since we delete entry in `mflinkrevs` during iteration, any
375 375 # remaining entries are "missing". We need to issue errors for them.
376 376 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
377 377 for c, m in sorted(changesetpairs):
378 378 if dir:
379 379 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
380 380 else:
381 self._err(
382 c,
383 _(b"changeset refers to unknown revision %s")
384 % short(m),
385 label,
386 )
381 msg = _(b"changeset refers to unknown revision %s")
382 msg %= short(m)
383 self._err(c, msg, label)
387 384
388 385 if not dir and subdirnodes:
389 386 self.ui.status(_(b"checking directory manifests\n"))
390 387 storefiles = set()
391 388 subdirs = set()
392 389 revlogv1 = self.revlogv1
393 390 for t, f, f2, size in repo.store.datafiles():
394 391 if not f:
395 392 self._err(None, _(b"cannot decode filename '%s'") % f2)
396 393 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
397 394 storefiles.add(_normpath(f))
398 395 subdirs.add(os.path.dirname(f))
399 396 subdirprogress = ui.makeprogress(
400 397 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
401 398 )
402 399
403 400 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
404 401 subdirfilenodes = self._verifymanifest(
405 402 linkrevs, subdir, storefiles, subdirprogress
406 403 )
407 404 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
408 405 filenodes.setdefault(f, {}).update(onefilenodes)
409 406
410 407 if not dir and subdirnodes:
411 408 assert subdirprogress is not None # help pytype
412 409 subdirprogress.complete()
413 410 if self.warnorphanstorefiles:
414 411 for f in sorted(storefiles):
415 412 self._warn(_(b"warning: orphan data file '%s'") % f)
416 413
417 414 return filenodes
418 415
419 416 def _crosscheckfiles(self, filelinkrevs, filenodes):
420 417 repo = self.repo
421 418 ui = self.ui
422 419 ui.status(_(b"crosschecking files in changesets and manifests\n"))
423 420
424 421 total = len(filelinkrevs) + len(filenodes)
425 422 progress = ui.makeprogress(
426 423 _(b'crosschecking'), unit=_(b'files'), total=total
427 424 )
428 425 if self.havemf:
429 426 for f in sorted(filelinkrevs):
430 427 progress.increment()
431 428 if f not in filenodes:
432 429 lr = filelinkrevs[f][0]
433 430 self._err(lr, _(b"in changeset but not in manifest"), f)
434 431
435 432 if self.havecl:
436 433 for f in sorted(filenodes):
437 434 progress.increment()
438 435 if f not in filelinkrevs:
439 436 try:
440 437 fl = repo.file(f)
441 438 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
442 439 except Exception:
443 440 lr = None
444 441 self._err(lr, _(b"in manifest but not in changeset"), f)
445 442
446 443 progress.complete()
447 444
448 445 def _verifyfiles(self, filenodes, filelinkrevs):
449 446 repo = self.repo
450 447 ui = self.ui
451 448 lrugetctx = self.lrugetctx
452 449 revlogv1 = self.revlogv1
453 450 havemf = self.havemf
454 451 ui.status(_(b"checking files\n"))
455 452
456 453 storefiles = set()
457 454 for rl_type, f, f2, size in repo.store.datafiles():
458 455 if not f:
459 456 self._err(None, _(b"cannot decode filename '%s'") % f2)
460 457 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
461 458 storefiles.add(_normpath(f))
462 459
463 460 state = {
464 461 # TODO this assumes revlog storage for changelog.
465 462 b'expectedversion': self.repo.changelog._format_version,
466 463 b'skipflags': self.skipflags,
467 464 # experimental config: censor.policy
468 465 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
469 466 }
470 467
471 468 files = sorted(set(filenodes) | set(filelinkrevs))
472 469 revisions = 0
473 470 progress = ui.makeprogress(
474 471 _(b'checking'), unit=_(b'files'), total=len(files)
475 472 )
476 473 for i, f in enumerate(files):
477 474 progress.update(i, item=f)
478 475 try:
479 476 linkrevs = filelinkrevs[f]
480 477 except KeyError:
481 478 # in manifest but not in changelog
482 479 linkrevs = []
483 480
484 481 if linkrevs:
485 482 lr = linkrevs[0]
486 483 else:
487 484 lr = None
488 485
489 486 try:
490 487 fl = repo.file(f)
491 488 except error.StorageError as e:
492 489 self._err(lr, _(b"broken revlog! (%s)") % e, f)
493 490 continue
494 491
495 492 for ff in fl.files():
496 493 try:
497 494 storefiles.remove(ff)
498 495 except KeyError:
499 496 if self.warnorphanstorefiles:
500 497 self._warn(
501 498 _(b" warning: revlog '%s' not in fncache!") % ff
502 499 )
503 500 self.fncachewarned = True
504 501
505 502 if not len(fl) and (self.havecl or self.havemf):
506 503 self._err(lr, _(b"empty or missing %s") % f)
507 504 else:
508 505 # Guard against implementations not setting this.
509 506 state[b'skipread'] = set()
510 507 state[b'safe_renamed'] = set()
511 508
512 509 for problem in fl.verifyintegrity(state):
513 510 if problem.node is not None:
514 511 linkrev = fl.linkrev(fl.rev(problem.node))
515 512 else:
516 513 linkrev = None
517 514
518 515 if problem.warning:
519 516 self._warn(problem.warning)
520 517 elif problem.error:
521 518 self._err(
522 519 linkrev if linkrev is not None else lr,
523 520 problem.error,
524 521 f,
525 522 )
526 523 else:
527 524 raise error.ProgrammingError(
528 525 b'problem instance does not set warning or error '
529 526 b'attribute: %s' % problem.msg
530 527 )
531 528
532 529 seen = {}
533 530 for i in fl:
534 531 revisions += 1
535 532 n = fl.node(i)
536 533 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
537 534 if f in filenodes:
538 535 if havemf and n not in filenodes[f]:
539 536 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
540 537 else:
541 538 del filenodes[f][n]
542 539
543 540 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
544 541 continue
545 542
546 543 # check renames
547 544 try:
548 545 # This requires resolving fulltext (at least on revlogs,
549 546 # though not with LFS revisions). We may want
550 547 # ``verifyintegrity()`` to pass a set of nodes with
551 548 # rename metadata as an optimization.
552 549 rp = fl.renamed(n)
553 550 if rp:
554 551 if lr is not None and ui.verbose:
555 552 ctx = lrugetctx(lr)
556 553 if not any(rp[0] in pctx for pctx in ctx.parents()):
557 554 self._warn(
558 555 _(
559 556 b"warning: copy source of '%s' not"
560 557 b" in parents of %s"
561 558 )
562 559 % (f, ctx)
563 560 )
564 561 fl2 = repo.file(rp[0])
565 562 if not len(fl2):
566 563 self._err(
567 564 lr,
568 565 _(
569 566 b"empty or missing copy source revlog "
570 567 b"%s:%s"
571 568 )
572 569 % (rp[0], short(rp[1])),
573 570 f,
574 571 )
575 572 elif rp[1] == self.repo.nullid:
576 573 ui.note(
577 574 _(
578 575 b"warning: %s@%s: copy source"
579 576 b" revision is nullid %s:%s\n"
580 577 )
581 578 % (f, lr, rp[0], short(rp[1]))
582 579 )
583 580 else:
584 581 fl2.rev(rp[1])
585 582 except Exception as inst:
586 583 self._exc(
587 584 lr, _(b"checking rename of %s") % short(n), inst, f
588 585 )
589 586
590 587 # cross-check
591 588 if f in filenodes:
592 589 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
593 590 for lr, node in sorted(fns):
594 591 self._err(
595 592 lr,
596 593 _(b"manifest refers to unknown revision %s")
597 594 % short(node),
598 595 f,
599 596 )
600 597 progress.complete()
601 598
602 599 if self.warnorphanstorefiles:
603 600 for f in sorted(storefiles):
604 601 self._warn(_(b"warning: orphan data file '%s'") % f)
605 602
606 603 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now