##// END OF EJS Templates
verify: align a comment line...
marmoute -
r48142:5ed2aaab default
parent child Browse files
Show More
@@ -1,627 +1,627 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import short
14 14 from .utils import stringutil
15 15
16 16 from . import (
17 17 error,
18 18 pycompat,
19 19 revlog,
20 20 util,
21 21 )
22 22
23 23 VERIFY_DEFAULT = 0
24 24 VERIFY_FULL = 1
25 25
26 26
27 27 def verify(repo, level=None):
28 28 with repo.lock():
29 29 v = verifier(repo, level)
30 30 return v.verify()
31 31
32 32
33 33 def _normpath(f):
34 34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 35 # converted repo may contain repeated slashes
36 36 while b'//' in f:
37 37 f = f.replace(b'//', b'/')
38 38 return f
39 39
40 40
41 41 class verifier(object):
42 42 def __init__(self, repo, level=None):
43 43 self.repo = repo.unfiltered()
44 44 self.ui = repo.ui
45 45 self.match = repo.narrowmatch()
46 46 if level is None:
47 47 level = VERIFY_DEFAULT
48 48 self._level = level
49 49 self.badrevs = set()
50 50 self.errors = 0
51 51 self.warnings = 0
52 52 self.havecl = len(repo.changelog) > 0
53 53 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
54 54 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
55 55 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
56 56 self.refersmf = False
57 57 self.fncachewarned = False
58 58 # developer config: verify.skipflags
59 59 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
60 60 self.warnorphanstorefiles = True
61 61
62 62 def _warn(self, msg):
63 63 """record a "warning" level issue"""
64 64 self.ui.warn(msg + b"\n")
65 65 self.warnings += 1
66 66
67 67 def _err(self, linkrev, msg, filename=None):
68 68 """record a "error" level issue"""
69 69 if linkrev is not None:
70 70 self.badrevs.add(linkrev)
71 71 linkrev = b"%d" % linkrev
72 72 else:
73 73 linkrev = b'?'
74 74 msg = b"%s: %s" % (linkrev, msg)
75 75 if filename:
76 76 msg = b"%s@%s" % (filename, msg)
77 77 self.ui.warn(b" " + msg + b"\n")
78 78 self.errors += 1
79 79
80 80 def _exc(self, linkrev, msg, inst, filename=None):
81 81 """record exception raised during the verify process"""
82 82 fmsg = stringutil.forcebytestr(inst)
83 83 if not fmsg:
84 84 fmsg = pycompat.byterepr(inst)
85 85 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
86 86
87 87 def _checkrevlog(self, obj, name, linkrev):
88 88 """verify high level property of a revlog
89 89
90 90 - revlog is present,
91 91 - revlog is non-empty,
92 92 - sizes (index and data) are correct,
93 93 - revlog's format version is correct.
94 94 """
95 95 if not len(obj) and (self.havecl or self.havemf):
96 96 self._err(linkrev, _(b"empty or missing %s") % name)
97 97 return
98 98
99 99 d = obj.checksize()
100 100 if d[0]:
101 101 self._err(None, _(b"data length off by %d bytes") % d[0], name)
102 102 if d[1]:
103 103 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
104 104
105 105 if obj._format_version != revlog.REVLOGV0:
106 106 if not self.revlogv1:
107 107 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
108 108 elif self.revlogv1:
109 109 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
110 110
111 111 def _checkentry(self, obj, i, node, seen, linkrevs, f):
112 112 """verify a single revlog entry
113 113
114 114 arguments are:
115 115 - obj: the source revlog
116 116 - i: the revision number
117 - node: the revision node id
117 - node: the revision node id
118 118 - seen: nodes previously seen for this revlog
119 119 - linkrevs: [changelog-revisions] introducing "node"
120 120 - f: string label ("changelog", "manifest", or filename)
121 121
122 122 Performs the following checks:
123 123 - linkrev points to an existing changelog revision,
124 124 - linkrev points to a changelog revision that introduces this revision,
125 125 - linkrev points to the lowest of these changesets,
126 126 - both parents exist in the revlog,
127 127 - the revision is not duplicated.
128 128
129 129 Return the linkrev of the revision (or None for changelog's revisions).
130 130 """
131 131 lr = obj.linkrev(obj.rev(node))
132 132 if lr < 0 or (self.havecl and lr not in linkrevs):
133 133 if lr < 0 or lr >= len(self.repo.changelog):
134 134 msg = _(b"rev %d points to nonexistent changeset %d")
135 135 else:
136 136 msg = _(b"rev %d points to unexpected changeset %d")
137 137 self._err(None, msg % (i, lr), f)
138 138 if linkrevs:
139 139 if f and len(linkrevs) > 1:
140 140 try:
141 141 # attempt to filter down to real linkrevs
142 142 linkrevs = [
143 143 l
144 144 for l in linkrevs
145 145 if self.lrugetctx(l)[f].filenode() == node
146 146 ]
147 147 except Exception:
148 148 pass
149 149 self._warn(
150 150 _(b" (expected %s)")
151 151 % b" ".join(map(pycompat.bytestr, linkrevs))
152 152 )
153 153 lr = None # can't be trusted
154 154
155 155 try:
156 156 p1, p2 = obj.parents(node)
157 157 if p1 not in seen and p1 != self.repo.nullid:
158 158 self._err(
159 159 lr,
160 160 _(b"unknown parent 1 %s of %s") % (short(p1), short(node)),
161 161 f,
162 162 )
163 163 if p2 not in seen and p2 != self.repo.nullid:
164 164 self._err(
165 165 lr,
166 166 _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
167 167 f,
168 168 )
169 169 except Exception as inst:
170 170 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
171 171
172 172 if node in seen:
173 173 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
174 174 seen[node] = i
175 175 return lr
176 176
177 177 def verify(self):
178 178 """verify the content of the Mercurial repository
179 179
180 180 This method run all verifications, displaying issues as they are found.
181 181
182 182 return 1 if any error have been encountered, 0 otherwise."""
183 183 # initial validation and generic report
184 184 repo = self.repo
185 185 ui = repo.ui
186 186 if not repo.url().startswith(b'file:'):
187 187 raise error.Abort(_(b"cannot verify bundle or remote repos"))
188 188
189 189 if os.path.exists(repo.sjoin(b"journal")):
190 190 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
191 191
192 192 if ui.verbose or not self.revlogv1:
193 193 ui.status(
194 194 _(b"repository uses revlog format %d\n")
195 195 % (self.revlogv1 and 1 or 0)
196 196 )
197 197
198 198 # data verification
199 199 mflinkrevs, filelinkrevs = self._verifychangelog()
200 200 filenodes = self._verifymanifest(mflinkrevs)
201 201 del mflinkrevs
202 202 self._crosscheckfiles(filelinkrevs, filenodes)
203 203 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
204 204
205 205 # final report
206 206 ui.status(
207 207 _(b"checked %d changesets with %d changes to %d files\n")
208 208 % (len(repo.changelog), filerevisions, totalfiles)
209 209 )
210 210 if self.warnings:
211 211 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
212 212 if self.fncachewarned:
213 213 ui.warn(
214 214 _(
215 215 b'hint: run "hg debugrebuildfncache" to recover from '
216 216 b'corrupt fncache\n'
217 217 )
218 218 )
219 219 if self.errors:
220 220 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
221 221 if self.badrevs:
222 222 ui.warn(
223 223 _(b"(first damaged changeset appears to be %d)\n")
224 224 % min(self.badrevs)
225 225 )
226 226 return 1
227 227 return 0
228 228
229 229 def _verifychangelog(self):
230 230 """verify the changelog of a repository
231 231
232 232 The following checks are performed:
233 233 - all of `_checkrevlog` checks,
234 234 - all of `_checkentry` checks (for each revisions),
235 235 - each revision can be read.
236 236
237 237 The function returns some of the data observed in the changesets as a
238 238 (mflinkrevs, filelinkrevs) tuples:
239 239 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
240 240 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
241 241
242 242 If a matcher was specified, filelinkrevs will only contains matched
243 243 files.
244 244 """
245 245 ui = self.ui
246 246 repo = self.repo
247 247 match = self.match
248 248 cl = repo.changelog
249 249
250 250 ui.status(_(b"checking changesets\n"))
251 251 mflinkrevs = {}
252 252 filelinkrevs = {}
253 253 seen = {}
254 254 self._checkrevlog(cl, b"changelog", 0)
255 255 progress = ui.makeprogress(
256 256 _(b'checking'), unit=_(b'changesets'), total=len(repo)
257 257 )
258 258 for i in repo:
259 259 progress.update(i)
260 260 n = cl.node(i)
261 261 self._checkentry(cl, i, n, seen, [i], b"changelog")
262 262
263 263 try:
264 264 changes = cl.read(n)
265 265 if changes[0] != self.repo.nullid:
266 266 mflinkrevs.setdefault(changes[0], []).append(i)
267 267 self.refersmf = True
268 268 for f in changes[3]:
269 269 if match(f):
270 270 filelinkrevs.setdefault(_normpath(f), []).append(i)
271 271 except Exception as inst:
272 272 self.refersmf = True
273 273 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
274 274 progress.complete()
275 275 return mflinkrevs, filelinkrevs
276 276
277 277 def _verifymanifest(
278 278 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
279 279 ):
280 280 """verify the manifestlog content
281 281
282 282 Inputs:
283 283 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
284 284 - dir: a subdirectory to check (for tree manifest repo)
285 285 - storefiles: set of currently "orphan" files.
286 286 - subdirprogress: a progress object
287 287
288 288 This function checks:
289 289 * all of `_checkrevlog` checks (for all manifest related revlogs)
290 290 * all of `_checkentry` checks (for all manifest related revisions)
291 291 * nodes for subdirectory exists in the sub-directory manifest
292 292 * each manifest entries have a file path
293 293 * each manifest node refered in mflinkrevs exist in the manifest log
294 294
295 295 If tree manifest is in use and a matchers is specified, only the
296 296 sub-directories matching it will be verified.
297 297
298 298 return a two level mapping:
299 299 {"path" -> { filenode -> changelog-revision}}
300 300
301 301 This mapping primarily contains entries for every files in the
302 302 repository. In addition, when tree-manifest is used, it also contains
303 303 sub-directory entries.
304 304
305 305 If a matcher is provided, only matching paths will be included.
306 306 """
307 307 repo = self.repo
308 308 ui = self.ui
309 309 match = self.match
310 310 mfl = self.repo.manifestlog
311 311 mf = mfl.getstorage(dir)
312 312
313 313 if not dir:
314 314 self.ui.status(_(b"checking manifests\n"))
315 315
316 316 filenodes = {}
317 317 subdirnodes = {}
318 318 seen = {}
319 319 label = b"manifest"
320 320 if dir:
321 321 label = dir
322 322 revlogfiles = mf.files()
323 323 storefiles.difference_update(revlogfiles)
324 324 if subdirprogress: # should be true since we're in a subdirectory
325 325 subdirprogress.increment()
326 326 if self.refersmf:
327 327 # Do not check manifest if there are only changelog entries with
328 328 # null manifests.
329 329 self._checkrevlog(mf._revlog, label, 0)
330 330 progress = ui.makeprogress(
331 331 _(b'checking'), unit=_(b'manifests'), total=len(mf)
332 332 )
333 333 for i in mf:
334 334 if not dir:
335 335 progress.update(i)
336 336 n = mf.node(i)
337 337 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
338 338 if n in mflinkrevs:
339 339 del mflinkrevs[n]
340 340 elif dir:
341 341 self._err(
342 342 lr,
343 343 _(b"%s not in parent-directory manifest") % short(n),
344 344 label,
345 345 )
346 346 else:
347 347 self._err(lr, _(b"%s not in changesets") % short(n), label)
348 348
349 349 try:
350 350 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
351 351 for f, fn, fl in mfdelta.iterentries():
352 352 if not f:
353 353 self._err(lr, _(b"entry without name in manifest"))
354 354 elif f == b"/dev/null": # ignore this in very old repos
355 355 continue
356 356 fullpath = dir + _normpath(f)
357 357 if fl == b't':
358 358 if not match.visitdir(fullpath):
359 359 continue
360 360 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
361 361 fn, []
362 362 ).append(lr)
363 363 else:
364 364 if not match(fullpath):
365 365 continue
366 366 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
367 367 except Exception as inst:
368 368 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
369 369 if self._level >= VERIFY_FULL:
370 370 try:
371 371 # Various issues can affect manifest. So we read each full
372 372 # text from storage. This triggers the checks from the core
373 373 # code (eg: hash verification, filename are ordered, etc.)
374 374 mfdelta = mfl.get(dir, n).read()
375 375 except Exception as inst:
376 376 self._exc(
377 377 lr,
378 378 _(b"reading full manifest %s") % short(n),
379 379 inst,
380 380 label,
381 381 )
382 382
383 383 if not dir:
384 384 progress.complete()
385 385
386 386 if self.havemf:
387 387 # since we delete entry in `mflinkrevs` during iteration, any
388 388 # remaining entries are "missing". We need to issue errors for them.
389 389 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
390 390 for c, m in sorted(changesetpairs):
391 391 if dir:
392 392 self._err(
393 393 c,
394 394 _(
395 395 b"parent-directory manifest refers to unknown"
396 396 b" revision %s"
397 397 )
398 398 % short(m),
399 399 label,
400 400 )
401 401 else:
402 402 self._err(
403 403 c,
404 404 _(b"changeset refers to unknown revision %s")
405 405 % short(m),
406 406 label,
407 407 )
408 408
409 409 if not dir and subdirnodes:
410 410 self.ui.status(_(b"checking directory manifests\n"))
411 411 storefiles = set()
412 412 subdirs = set()
413 413 revlogv1 = self.revlogv1
414 414 for t, f, f2, size in repo.store.datafiles():
415 415 if not f:
416 416 self._err(None, _(b"cannot decode filename '%s'") % f2)
417 417 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
418 418 storefiles.add(_normpath(f))
419 419 subdirs.add(os.path.dirname(f))
420 420 subdirprogress = ui.makeprogress(
421 421 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
422 422 )
423 423
424 424 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
425 425 subdirfilenodes = self._verifymanifest(
426 426 linkrevs, subdir, storefiles, subdirprogress
427 427 )
428 428 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
429 429 filenodes.setdefault(f, {}).update(onefilenodes)
430 430
431 431 if not dir and subdirnodes:
432 432 assert subdirprogress is not None # help pytype
433 433 subdirprogress.complete()
434 434 if self.warnorphanstorefiles:
435 435 for f in sorted(storefiles):
436 436 self._warn(_(b"warning: orphan data file '%s'") % f)
437 437
438 438 return filenodes
439 439
440 440 def _crosscheckfiles(self, filelinkrevs, filenodes):
441 441 repo = self.repo
442 442 ui = self.ui
443 443 ui.status(_(b"crosschecking files in changesets and manifests\n"))
444 444
445 445 total = len(filelinkrevs) + len(filenodes)
446 446 progress = ui.makeprogress(
447 447 _(b'crosschecking'), unit=_(b'files'), total=total
448 448 )
449 449 if self.havemf:
450 450 for f in sorted(filelinkrevs):
451 451 progress.increment()
452 452 if f not in filenodes:
453 453 lr = filelinkrevs[f][0]
454 454 self._err(lr, _(b"in changeset but not in manifest"), f)
455 455
456 456 if self.havecl:
457 457 for f in sorted(filenodes):
458 458 progress.increment()
459 459 if f not in filelinkrevs:
460 460 try:
461 461 fl = repo.file(f)
462 462 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
463 463 except Exception:
464 464 lr = None
465 465 self._err(lr, _(b"in manifest but not in changeset"), f)
466 466
467 467 progress.complete()
468 468
469 469 def _verifyfiles(self, filenodes, filelinkrevs):
470 470 repo = self.repo
471 471 ui = self.ui
472 472 lrugetctx = self.lrugetctx
473 473 revlogv1 = self.revlogv1
474 474 havemf = self.havemf
475 475 ui.status(_(b"checking files\n"))
476 476
477 477 storefiles = set()
478 478 for rl_type, f, f2, size in repo.store.datafiles():
479 479 if not f:
480 480 self._err(None, _(b"cannot decode filename '%s'") % f2)
481 481 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
482 482 storefiles.add(_normpath(f))
483 483
484 484 state = {
485 485 # TODO this assumes revlog storage for changelog.
486 486 b'expectedversion': self.repo.changelog._format_version,
487 487 b'skipflags': self.skipflags,
488 488 # experimental config: censor.policy
489 489 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
490 490 }
491 491
492 492 files = sorted(set(filenodes) | set(filelinkrevs))
493 493 revisions = 0
494 494 progress = ui.makeprogress(
495 495 _(b'checking'), unit=_(b'files'), total=len(files)
496 496 )
497 497 for i, f in enumerate(files):
498 498 progress.update(i, item=f)
499 499 try:
500 500 linkrevs = filelinkrevs[f]
501 501 except KeyError:
502 502 # in manifest but not in changelog
503 503 linkrevs = []
504 504
505 505 if linkrevs:
506 506 lr = linkrevs[0]
507 507 else:
508 508 lr = None
509 509
510 510 try:
511 511 fl = repo.file(f)
512 512 except error.StorageError as e:
513 513 self._err(lr, _(b"broken revlog! (%s)") % e, f)
514 514 continue
515 515
516 516 for ff in fl.files():
517 517 try:
518 518 storefiles.remove(ff)
519 519 except KeyError:
520 520 if self.warnorphanstorefiles:
521 521 self._warn(
522 522 _(b" warning: revlog '%s' not in fncache!") % ff
523 523 )
524 524 self.fncachewarned = True
525 525
526 526 if not len(fl) and (self.havecl or self.havemf):
527 527 self._err(lr, _(b"empty or missing %s") % f)
528 528 else:
529 529 # Guard against implementations not setting this.
530 530 state[b'skipread'] = set()
531 531 state[b'safe_renamed'] = set()
532 532
533 533 for problem in fl.verifyintegrity(state):
534 534 if problem.node is not None:
535 535 linkrev = fl.linkrev(fl.rev(problem.node))
536 536 else:
537 537 linkrev = None
538 538
539 539 if problem.warning:
540 540 self._warn(problem.warning)
541 541 elif problem.error:
542 542 self._err(
543 543 linkrev if linkrev is not None else lr,
544 544 problem.error,
545 545 f,
546 546 )
547 547 else:
548 548 raise error.ProgrammingError(
549 549 b'problem instance does not set warning or error '
550 550 b'attribute: %s' % problem.msg
551 551 )
552 552
553 553 seen = {}
554 554 for i in fl:
555 555 revisions += 1
556 556 n = fl.node(i)
557 557 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
558 558 if f in filenodes:
559 559 if havemf and n not in filenodes[f]:
560 560 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
561 561 else:
562 562 del filenodes[f][n]
563 563
564 564 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
565 565 continue
566 566
567 567 # check renames
568 568 try:
569 569 # This requires resolving fulltext (at least on revlogs,
570 570 # though not with LFS revisions). We may want
571 571 # ``verifyintegrity()`` to pass a set of nodes with
572 572 # rename metadata as an optimization.
573 573 rp = fl.renamed(n)
574 574 if rp:
575 575 if lr is not None and ui.verbose:
576 576 ctx = lrugetctx(lr)
577 577 if not any(rp[0] in pctx for pctx in ctx.parents()):
578 578 self._warn(
579 579 _(
580 580 b"warning: copy source of '%s' not"
581 581 b" in parents of %s"
582 582 )
583 583 % (f, ctx)
584 584 )
585 585 fl2 = repo.file(rp[0])
586 586 if not len(fl2):
587 587 self._err(
588 588 lr,
589 589 _(
590 590 b"empty or missing copy source revlog "
591 591 b"%s:%s"
592 592 )
593 593 % (rp[0], short(rp[1])),
594 594 f,
595 595 )
596 596 elif rp[1] == self.repo.nullid:
597 597 ui.note(
598 598 _(
599 599 b"warning: %s@%s: copy source"
600 600 b" revision is nullid %s:%s\n"
601 601 )
602 602 % (f, lr, rp[0], short(rp[1]))
603 603 )
604 604 else:
605 605 fl2.rev(rp[1])
606 606 except Exception as inst:
607 607 self._exc(
608 608 lr, _(b"checking rename of %s") % short(n), inst, f
609 609 )
610 610
611 611 # cross-check
612 612 if f in filenodes:
613 613 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
614 614 for lr, node in sorted(fns):
615 615 self._err(
616 616 lr,
617 617 _(b"manifest refers to unknown revision %s")
618 618 % short(node),
619 619 f,
620 620 )
621 621 progress.complete()
622 622
623 623 if self.warnorphanstorefiles:
624 624 for f in sorted(storefiles):
625 625 self._warn(_(b"warning: orphan data file '%s'") % f)
626 626
627 627 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now