##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48151:fb438539 default
parent child Browse files
Show More
@@ -1,611 +1,610 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import short
14 14 from .utils import stringutil
15 15
16 16 from . import (
17 17 error,
18 18 pycompat,
19 19 revlog,
20 20 util,
21 21 )
22 22
23 23 VERIFY_DEFAULT = 0
24 24 VERIFY_FULL = 1
25 25
26 26
27 27 def verify(repo, level=None):
28 28 with repo.lock():
29 29 v = verifier(repo, level)
30 30 return v.verify()
31 31
32 32
33 33 def _normpath(f):
34 34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 35 # converted repo may contain repeated slashes
36 36 while b'//' in f:
37 37 f = f.replace(b'//', b'/')
38 38 return f
39 39
40 40
41 41 HINT_FNCACHE = _(
42 42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 43 )
44 44
45 45
46 46 class verifier(object):
47 47 def __init__(self, repo, level=None):
48 48 self.repo = repo.unfiltered()
49 49 self.ui = repo.ui
50 50 self.match = repo.narrowmatch()
51 51 if level is None:
52 52 level = VERIFY_DEFAULT
53 53 self._level = level
54 54 self.badrevs = set()
55 55 self.errors = 0
56 56 self.warnings = 0
57 57 self.havecl = len(repo.changelog) > 0
58 58 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
59 59 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
60 60 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
61 61 self.refersmf = False
62 62 self.fncachewarned = False
63 63 # developer config: verify.skipflags
64 64 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
65 65 self.warnorphanstorefiles = True
66 66
67 67 def _warn(self, msg):
68 68 """record a "warning" level issue"""
69 69 self.ui.warn(msg + b"\n")
70 70 self.warnings += 1
71 71
72 72 def _err(self, linkrev, msg, filename=None):
73 73 """record a "error" level issue"""
74 74 if linkrev is not None:
75 75 self.badrevs.add(linkrev)
76 76 linkrev = b"%d" % linkrev
77 77 else:
78 78 linkrev = b'?'
79 79 msg = b"%s: %s" % (linkrev, msg)
80 80 if filename:
81 81 msg = b"%s@%s" % (filename, msg)
82 82 self.ui.warn(b" " + msg + b"\n")
83 83 self.errors += 1
84 84
85 85 def _exc(self, linkrev, msg, inst, filename=None):
86 86 """record exception raised during the verify process"""
87 87 fmsg = stringutil.forcebytestr(inst)
88 88 if not fmsg:
89 89 fmsg = pycompat.byterepr(inst)
90 90 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
91 91
92 92 def _checkrevlog(self, obj, name, linkrev):
93 93 """verify high level property of a revlog
94 94
95 95 - revlog is present,
96 96 - revlog is non-empty,
97 97 - sizes (index and data) are correct,
98 98 - revlog's format version is correct.
99 99 """
100 100 if not len(obj) and (self.havecl or self.havemf):
101 101 self._err(linkrev, _(b"empty or missing %s") % name)
102 102 return
103 103
104 104 d = obj.checksize()
105 105 if d[0]:
106 106 self._err(None, _(b"data length off by %d bytes") % d[0], name)
107 107 if d[1]:
108 108 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
109 109
110 110 if obj._format_version != revlog.REVLOGV0:
111 111 if not self.revlogv1:
112 112 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
113 113 elif self.revlogv1:
114 114 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
115 115
116 116 def _checkentry(self, obj, i, node, seen, linkrevs, f):
117 117 """verify a single revlog entry
118 118
119 119 arguments are:
120 120 - obj: the source revlog
121 121 - i: the revision number
122 122 - node: the revision node id
123 123 - seen: nodes previously seen for this revlog
124 124 - linkrevs: [changelog-revisions] introducing "node"
125 125 - f: string label ("changelog", "manifest", or filename)
126 126
127 127 Performs the following checks:
128 128 - linkrev points to an existing changelog revision,
129 129 - linkrev points to a changelog revision that introduces this revision,
130 130 - linkrev points to the lowest of these changesets,
131 131 - both parents exist in the revlog,
132 132 - the revision is not duplicated.
133 133
134 134 Return the linkrev of the revision (or None for changelog's revisions).
135 135 """
136 136 lr = obj.linkrev(obj.rev(node))
137 137 if lr < 0 or (self.havecl and lr not in linkrevs):
138 138 if lr < 0 or lr >= len(self.repo.changelog):
139 139 msg = _(b"rev %d points to nonexistent changeset %d")
140 140 else:
141 141 msg = _(b"rev %d points to unexpected changeset %d")
142 142 self._err(None, msg % (i, lr), f)
143 143 if linkrevs:
144 144 if f and len(linkrevs) > 1:
145 145 try:
146 146 # attempt to filter down to real linkrevs
147 147 linkrevs = []
148 148 for lr in linkrevs:
149 149 if self.lrugetctx(lr)[f].filenode() == node:
150 150 linkrevs.append(lr)
151 151 except Exception:
152 152 pass
153 153 msg = _(b" (expected %s)")
154 154 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
155 155 self._warn(msg)
156 156 lr = None # can't be trusted
157 157
158 158 try:
159 159 p1, p2 = obj.parents(node)
160 160 if p1 not in seen and p1 != self.repo.nullid:
161 161 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
162 162 self._err(lr, msg, f)
163 163 if p2 not in seen and p2 != self.repo.nullid:
164 164 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
165 165 self._err(lr, msg, f)
166 166 except Exception as inst:
167 167 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
168 168
169 169 if node in seen:
170 170 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
171 171 seen[node] = i
172 172 return lr
173 173
174 174 def verify(self):
175 175 """verify the content of the Mercurial repository
176 176
177 177 This method run all verifications, displaying issues as they are found.
178 178
179 179 return 1 if any error have been encountered, 0 otherwise."""
180 180 # initial validation and generic report
181 181 repo = self.repo
182 182 ui = repo.ui
183 183 if not repo.url().startswith(b'file:'):
184 184 raise error.Abort(_(b"cannot verify bundle or remote repos"))
185 185
186 186 if os.path.exists(repo.sjoin(b"journal")):
187 187 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
188 188
189 189 if ui.verbose or not self.revlogv1:
190 190 ui.status(
191 191 _(b"repository uses revlog format %d\n")
192 192 % (self.revlogv1 and 1 or 0)
193 193 )
194 194
195 195 # data verification
196 196 mflinkrevs, filelinkrevs = self._verifychangelog()
197 197 filenodes = self._verifymanifest(mflinkrevs)
198 198 del mflinkrevs
199 199 self._crosscheckfiles(filelinkrevs, filenodes)
200 200 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
201 201
202 202 # final report
203 203 ui.status(
204 204 _(b"checked %d changesets with %d changes to %d files\n")
205 205 % (len(repo.changelog), filerevisions, totalfiles)
206 206 )
207 207 if self.warnings:
208 208 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
209 209 if self.fncachewarned:
210 210 ui.warn(HINT_FNCACHE)
211 211 if self.errors:
212 212 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
213 213 if self.badrevs:
214 214 msg = _(b"(first damaged changeset appears to be %d)\n")
215 215 msg %= min(self.badrevs)
216 216 ui.warn(msg)
217 217 return 1
218 218 return 0
219 219
220 220 def _verifychangelog(self):
221 221 """verify the changelog of a repository
222 222
223 223 The following checks are performed:
224 224 - all of `_checkrevlog` checks,
225 225 - all of `_checkentry` checks (for each revisions),
226 226 - each revision can be read.
227 227
228 228 The function returns some of the data observed in the changesets as a
229 229 (mflinkrevs, filelinkrevs) tuples:
230 230 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
231 231 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
232 232
233 233 If a matcher was specified, filelinkrevs will only contains matched
234 234 files.
235 235 """
236 236 ui = self.ui
237 237 repo = self.repo
238 238 match = self.match
239 239 cl = repo.changelog
240 240
241 241 ui.status(_(b"checking changesets\n"))
242 242 mflinkrevs = {}
243 243 filelinkrevs = {}
244 244 seen = {}
245 245 self._checkrevlog(cl, b"changelog", 0)
246 246 progress = ui.makeprogress(
247 247 _(b'checking'), unit=_(b'changesets'), total=len(repo)
248 248 )
249 249 for i in repo:
250 250 progress.update(i)
251 251 n = cl.node(i)
252 252 self._checkentry(cl, i, n, seen, [i], b"changelog")
253 253
254 254 try:
255 255 changes = cl.read(n)
256 256 if changes[0] != self.repo.nullid:
257 257 mflinkrevs.setdefault(changes[0], []).append(i)
258 258 self.refersmf = True
259 259 for f in changes[3]:
260 260 if match(f):
261 261 filelinkrevs.setdefault(_normpath(f), []).append(i)
262 262 except Exception as inst:
263 263 self.refersmf = True
264 264 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
265 265 progress.complete()
266 266 return mflinkrevs, filelinkrevs
267 267
268 268 def _verifymanifest(
269 269 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
270 270 ):
271 271 """verify the manifestlog content
272 272
273 273 Inputs:
274 274 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
275 275 - dir: a subdirectory to check (for tree manifest repo)
276 276 - storefiles: set of currently "orphan" files.
277 277 - subdirprogress: a progress object
278 278
279 279 This function checks:
280 280 * all of `_checkrevlog` checks (for all manifest related revlogs)
281 281 * all of `_checkentry` checks (for all manifest related revisions)
282 282 * nodes for subdirectory exists in the sub-directory manifest
283 283 * each manifest entries have a file path
284 284 * each manifest node refered in mflinkrevs exist in the manifest log
285 285
286 286 If tree manifest is in use and a matchers is specified, only the
287 287 sub-directories matching it will be verified.
288 288
289 289 return a two level mapping:
290 290 {"path" -> { filenode -> changelog-revision}}
291 291
292 292 This mapping primarily contains entries for every files in the
293 293 repository. In addition, when tree-manifest is used, it also contains
294 294 sub-directory entries.
295 295
296 296 If a matcher is provided, only matching paths will be included.
297 297 """
298 298 repo = self.repo
299 299 ui = self.ui
300 300 match = self.match
301 301 mfl = self.repo.manifestlog
302 302 mf = mfl.getstorage(dir)
303 303
304 304 if not dir:
305 305 self.ui.status(_(b"checking manifests\n"))
306 306
307 307 filenodes = {}
308 308 subdirnodes = {}
309 309 seen = {}
310 310 label = b"manifest"
311 311 if dir:
312 312 label = dir
313 313 revlogfiles = mf.files()
314 314 storefiles.difference_update(revlogfiles)
315 315 if subdirprogress: # should be true since we're in a subdirectory
316 316 subdirprogress.increment()
317 317 if self.refersmf:
318 318 # Do not check manifest if there are only changelog entries with
319 319 # null manifests.
320 320 self._checkrevlog(mf._revlog, label, 0)
321 321 progress = ui.makeprogress(
322 322 _(b'checking'), unit=_(b'manifests'), total=len(mf)
323 323 )
324 324 for i in mf:
325 325 if not dir:
326 326 progress.update(i)
327 327 n = mf.node(i)
328 328 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
329 329 if n in mflinkrevs:
330 330 del mflinkrevs[n]
331 331 elif dir:
332 332 msg = _(b"%s not in parent-directory manifest") % short(n)
333 333 self._err(lr, msg, label)
334 334 else:
335 335 self._err(lr, _(b"%s not in changesets") % short(n), label)
336 336
337 337 try:
338 338 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
339 339 for f, fn, fl in mfdelta.iterentries():
340 340 if not f:
341 341 self._err(lr, _(b"entry without name in manifest"))
342 342 elif f == b"/dev/null": # ignore this in very old repos
343 343 continue
344 344 fullpath = dir + _normpath(f)
345 345 if fl == b't':
346 346 if not match.visitdir(fullpath):
347 347 continue
348 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
349 fn, []
350 ).append(lr)
348 sdn = subdirnodes.setdefault(fullpath + b'/', {})
349 sdn.setdefault(fn, []).append(lr)
351 350 else:
352 351 if not match(fullpath):
353 352 continue
354 353 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
355 354 except Exception as inst:
356 355 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
357 356 if self._level >= VERIFY_FULL:
358 357 try:
359 358 # Various issues can affect manifest. So we read each full
360 359 # text from storage. This triggers the checks from the core
361 360 # code (eg: hash verification, filename are ordered, etc.)
362 361 mfdelta = mfl.get(dir, n).read()
363 362 except Exception as inst:
364 363 msg = _(b"reading full manifest %s") % short(n)
365 364 self._exc(lr, msg, inst, label)
366 365
367 366 if not dir:
368 367 progress.complete()
369 368
370 369 if self.havemf:
371 370 # since we delete entry in `mflinkrevs` during iteration, any
372 371 # remaining entries are "missing". We need to issue errors for them.
373 372 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
374 373 for c, m in sorted(changesetpairs):
375 374 if dir:
376 375 self._err(
377 376 c,
378 377 _(
379 378 b"parent-directory manifest refers to unknown"
380 379 b" revision %s"
381 380 )
382 381 % short(m),
383 382 label,
384 383 )
385 384 else:
386 385 self._err(
387 386 c,
388 387 _(b"changeset refers to unknown revision %s")
389 388 % short(m),
390 389 label,
391 390 )
392 391
393 392 if not dir and subdirnodes:
394 393 self.ui.status(_(b"checking directory manifests\n"))
395 394 storefiles = set()
396 395 subdirs = set()
397 396 revlogv1 = self.revlogv1
398 397 for t, f, f2, size in repo.store.datafiles():
399 398 if not f:
400 399 self._err(None, _(b"cannot decode filename '%s'") % f2)
401 400 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
402 401 storefiles.add(_normpath(f))
403 402 subdirs.add(os.path.dirname(f))
404 403 subdirprogress = ui.makeprogress(
405 404 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
406 405 )
407 406
408 407 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
409 408 subdirfilenodes = self._verifymanifest(
410 409 linkrevs, subdir, storefiles, subdirprogress
411 410 )
412 411 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
413 412 filenodes.setdefault(f, {}).update(onefilenodes)
414 413
415 414 if not dir and subdirnodes:
416 415 assert subdirprogress is not None # help pytype
417 416 subdirprogress.complete()
418 417 if self.warnorphanstorefiles:
419 418 for f in sorted(storefiles):
420 419 self._warn(_(b"warning: orphan data file '%s'") % f)
421 420
422 421 return filenodes
423 422
424 423 def _crosscheckfiles(self, filelinkrevs, filenodes):
425 424 repo = self.repo
426 425 ui = self.ui
427 426 ui.status(_(b"crosschecking files in changesets and manifests\n"))
428 427
429 428 total = len(filelinkrevs) + len(filenodes)
430 429 progress = ui.makeprogress(
431 430 _(b'crosschecking'), unit=_(b'files'), total=total
432 431 )
433 432 if self.havemf:
434 433 for f in sorted(filelinkrevs):
435 434 progress.increment()
436 435 if f not in filenodes:
437 436 lr = filelinkrevs[f][0]
438 437 self._err(lr, _(b"in changeset but not in manifest"), f)
439 438
440 439 if self.havecl:
441 440 for f in sorted(filenodes):
442 441 progress.increment()
443 442 if f not in filelinkrevs:
444 443 try:
445 444 fl = repo.file(f)
446 445 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
447 446 except Exception:
448 447 lr = None
449 448 self._err(lr, _(b"in manifest but not in changeset"), f)
450 449
451 450 progress.complete()
452 451
453 452 def _verifyfiles(self, filenodes, filelinkrevs):
454 453 repo = self.repo
455 454 ui = self.ui
456 455 lrugetctx = self.lrugetctx
457 456 revlogv1 = self.revlogv1
458 457 havemf = self.havemf
459 458 ui.status(_(b"checking files\n"))
460 459
461 460 storefiles = set()
462 461 for rl_type, f, f2, size in repo.store.datafiles():
463 462 if not f:
464 463 self._err(None, _(b"cannot decode filename '%s'") % f2)
465 464 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
466 465 storefiles.add(_normpath(f))
467 466
468 467 state = {
469 468 # TODO this assumes revlog storage for changelog.
470 469 b'expectedversion': self.repo.changelog._format_version,
471 470 b'skipflags': self.skipflags,
472 471 # experimental config: censor.policy
473 472 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
474 473 }
475 474
476 475 files = sorted(set(filenodes) | set(filelinkrevs))
477 476 revisions = 0
478 477 progress = ui.makeprogress(
479 478 _(b'checking'), unit=_(b'files'), total=len(files)
480 479 )
481 480 for i, f in enumerate(files):
482 481 progress.update(i, item=f)
483 482 try:
484 483 linkrevs = filelinkrevs[f]
485 484 except KeyError:
486 485 # in manifest but not in changelog
487 486 linkrevs = []
488 487
489 488 if linkrevs:
490 489 lr = linkrevs[0]
491 490 else:
492 491 lr = None
493 492
494 493 try:
495 494 fl = repo.file(f)
496 495 except error.StorageError as e:
497 496 self._err(lr, _(b"broken revlog! (%s)") % e, f)
498 497 continue
499 498
500 499 for ff in fl.files():
501 500 try:
502 501 storefiles.remove(ff)
503 502 except KeyError:
504 503 if self.warnorphanstorefiles:
505 504 self._warn(
506 505 _(b" warning: revlog '%s' not in fncache!") % ff
507 506 )
508 507 self.fncachewarned = True
509 508
510 509 if not len(fl) and (self.havecl or self.havemf):
511 510 self._err(lr, _(b"empty or missing %s") % f)
512 511 else:
513 512 # Guard against implementations not setting this.
514 513 state[b'skipread'] = set()
515 514 state[b'safe_renamed'] = set()
516 515
517 516 for problem in fl.verifyintegrity(state):
518 517 if problem.node is not None:
519 518 linkrev = fl.linkrev(fl.rev(problem.node))
520 519 else:
521 520 linkrev = None
522 521
523 522 if problem.warning:
524 523 self._warn(problem.warning)
525 524 elif problem.error:
526 525 self._err(
527 526 linkrev if linkrev is not None else lr,
528 527 problem.error,
529 528 f,
530 529 )
531 530 else:
532 531 raise error.ProgrammingError(
533 532 b'problem instance does not set warning or error '
534 533 b'attribute: %s' % problem.msg
535 534 )
536 535
537 536 seen = {}
538 537 for i in fl:
539 538 revisions += 1
540 539 n = fl.node(i)
541 540 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
542 541 if f in filenodes:
543 542 if havemf and n not in filenodes[f]:
544 543 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
545 544 else:
546 545 del filenodes[f][n]
547 546
548 547 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
549 548 continue
550 549
551 550 # check renames
552 551 try:
553 552 # This requires resolving fulltext (at least on revlogs,
554 553 # though not with LFS revisions). We may want
555 554 # ``verifyintegrity()`` to pass a set of nodes with
556 555 # rename metadata as an optimization.
557 556 rp = fl.renamed(n)
558 557 if rp:
559 558 if lr is not None and ui.verbose:
560 559 ctx = lrugetctx(lr)
561 560 if not any(rp[0] in pctx for pctx in ctx.parents()):
562 561 self._warn(
563 562 _(
564 563 b"warning: copy source of '%s' not"
565 564 b" in parents of %s"
566 565 )
567 566 % (f, ctx)
568 567 )
569 568 fl2 = repo.file(rp[0])
570 569 if not len(fl2):
571 570 self._err(
572 571 lr,
573 572 _(
574 573 b"empty or missing copy source revlog "
575 574 b"%s:%s"
576 575 )
577 576 % (rp[0], short(rp[1])),
578 577 f,
579 578 )
580 579 elif rp[1] == self.repo.nullid:
581 580 ui.note(
582 581 _(
583 582 b"warning: %s@%s: copy source"
584 583 b" revision is nullid %s:%s\n"
585 584 )
586 585 % (f, lr, rp[0], short(rp[1]))
587 586 )
588 587 else:
589 588 fl2.rev(rp[1])
590 589 except Exception as inst:
591 590 self._exc(
592 591 lr, _(b"checking rename of %s") % short(n), inst, f
593 592 )
594 593
595 594 # cross-check
596 595 if f in filenodes:
597 596 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
598 597 for lr, node in sorted(fns):
599 598 self._err(
600 599 lr,
601 600 _(b"manifest refers to unknown revision %s")
602 601 % short(node),
603 602 f,
604 603 )
605 604 progress.complete()
606 605
607 606 if self.warnorphanstorefiles:
608 607 for f in sorted(storefiles):
609 608 self._warn(_(b"warning: orphan data file '%s'") % f)
610 609
611 610 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now