##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48154:f39df554 default
parent child Browse files
Show More
@@ -1,603 +1,602 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import short
14 14 from .utils import stringutil
15 15
16 16 from . import (
17 17 error,
18 18 pycompat,
19 19 revlog,
20 20 util,
21 21 )
22 22
23 23 VERIFY_DEFAULT = 0
24 24 VERIFY_FULL = 1
25 25
26 26
27 27 def verify(repo, level=None):
28 28 with repo.lock():
29 29 v = verifier(repo, level)
30 30 return v.verify()
31 31
32 32
33 33 def _normpath(f):
34 34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 35 # converted repo may contain repeated slashes
36 36 while b'//' in f:
37 37 f = f.replace(b'//', b'/')
38 38 return f
39 39
40 40
41 41 HINT_FNCACHE = _(
42 42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 43 )
44 44
45 45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 46 b"parent-directory manifest refers to unknown revision %s"
47 47 )
48 48
49 49
50 50 class verifier(object):
51 51 def __init__(self, repo, level=None):
52 52 self.repo = repo.unfiltered()
53 53 self.ui = repo.ui
54 54 self.match = repo.narrowmatch()
55 55 if level is None:
56 56 level = VERIFY_DEFAULT
57 57 self._level = level
58 58 self.badrevs = set()
59 59 self.errors = 0
60 60 self.warnings = 0
61 61 self.havecl = len(repo.changelog) > 0
62 62 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
63 63 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
64 64 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
65 65 self.refersmf = False
66 66 self.fncachewarned = False
67 67 # developer config: verify.skipflags
68 68 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
69 69 self.warnorphanstorefiles = True
70 70
71 71 def _warn(self, msg):
72 72 """record a "warning" level issue"""
73 73 self.ui.warn(msg + b"\n")
74 74 self.warnings += 1
75 75
76 76 def _err(self, linkrev, msg, filename=None):
77 77 """record a "error" level issue"""
78 78 if linkrev is not None:
79 79 self.badrevs.add(linkrev)
80 80 linkrev = b"%d" % linkrev
81 81 else:
82 82 linkrev = b'?'
83 83 msg = b"%s: %s" % (linkrev, msg)
84 84 if filename:
85 85 msg = b"%s@%s" % (filename, msg)
86 86 self.ui.warn(b" " + msg + b"\n")
87 87 self.errors += 1
88 88
89 89 def _exc(self, linkrev, msg, inst, filename=None):
90 90 """record exception raised during the verify process"""
91 91 fmsg = stringutil.forcebytestr(inst)
92 92 if not fmsg:
93 93 fmsg = pycompat.byterepr(inst)
94 94 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
95 95
96 96 def _checkrevlog(self, obj, name, linkrev):
97 97 """verify high level property of a revlog
98 98
99 99 - revlog is present,
100 100 - revlog is non-empty,
101 101 - sizes (index and data) are correct,
102 102 - revlog's format version is correct.
103 103 """
104 104 if not len(obj) and (self.havecl or self.havemf):
105 105 self._err(linkrev, _(b"empty or missing %s") % name)
106 106 return
107 107
108 108 d = obj.checksize()
109 109 if d[0]:
110 110 self._err(None, _(b"data length off by %d bytes") % d[0], name)
111 111 if d[1]:
112 112 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
113 113
114 114 if obj._format_version != revlog.REVLOGV0:
115 115 if not self.revlogv1:
116 116 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
117 117 elif self.revlogv1:
118 118 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
119 119
120 120 def _checkentry(self, obj, i, node, seen, linkrevs, f):
121 121 """verify a single revlog entry
122 122
123 123 arguments are:
124 124 - obj: the source revlog
125 125 - i: the revision number
126 126 - node: the revision node id
127 127 - seen: nodes previously seen for this revlog
128 128 - linkrevs: [changelog-revisions] introducing "node"
129 129 - f: string label ("changelog", "manifest", or filename)
130 130
131 131 Performs the following checks:
132 132 - linkrev points to an existing changelog revision,
133 133 - linkrev points to a changelog revision that introduces this revision,
134 134 - linkrev points to the lowest of these changesets,
135 135 - both parents exist in the revlog,
136 136 - the revision is not duplicated.
137 137
138 138 Return the linkrev of the revision (or None for changelog's revisions).
139 139 """
140 140 lr = obj.linkrev(obj.rev(node))
141 141 if lr < 0 or (self.havecl and lr not in linkrevs):
142 142 if lr < 0 or lr >= len(self.repo.changelog):
143 143 msg = _(b"rev %d points to nonexistent changeset %d")
144 144 else:
145 145 msg = _(b"rev %d points to unexpected changeset %d")
146 146 self._err(None, msg % (i, lr), f)
147 147 if linkrevs:
148 148 if f and len(linkrevs) > 1:
149 149 try:
150 150 # attempt to filter down to real linkrevs
151 151 linkrevs = []
152 152 for lr in linkrevs:
153 153 if self.lrugetctx(lr)[f].filenode() == node:
154 154 linkrevs.append(lr)
155 155 except Exception:
156 156 pass
157 157 msg = _(b" (expected %s)")
158 158 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
159 159 self._warn(msg)
160 160 lr = None # can't be trusted
161 161
162 162 try:
163 163 p1, p2 = obj.parents(node)
164 164 if p1 not in seen and p1 != self.repo.nullid:
165 165 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
166 166 self._err(lr, msg, f)
167 167 if p2 not in seen and p2 != self.repo.nullid:
168 168 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
169 169 self._err(lr, msg, f)
170 170 except Exception as inst:
171 171 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
172 172
173 173 if node in seen:
174 174 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
175 175 seen[node] = i
176 176 return lr
177 177
178 178 def verify(self):
179 179 """verify the content of the Mercurial repository
180 180
181 181 This method run all verifications, displaying issues as they are found.
182 182
183 183 return 1 if any error have been encountered, 0 otherwise."""
184 184 # initial validation and generic report
185 185 repo = self.repo
186 186 ui = repo.ui
187 187 if not repo.url().startswith(b'file:'):
188 188 raise error.Abort(_(b"cannot verify bundle or remote repos"))
189 189
190 190 if os.path.exists(repo.sjoin(b"journal")):
191 191 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
192 192
193 193 if ui.verbose or not self.revlogv1:
194 194 ui.status(
195 195 _(b"repository uses revlog format %d\n")
196 196 % (self.revlogv1 and 1 or 0)
197 197 )
198 198
199 199 # data verification
200 200 mflinkrevs, filelinkrevs = self._verifychangelog()
201 201 filenodes = self._verifymanifest(mflinkrevs)
202 202 del mflinkrevs
203 203 self._crosscheckfiles(filelinkrevs, filenodes)
204 204 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
205 205
206 206 # final report
207 207 ui.status(
208 208 _(b"checked %d changesets with %d changes to %d files\n")
209 209 % (len(repo.changelog), filerevisions, totalfiles)
210 210 )
211 211 if self.warnings:
212 212 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
213 213 if self.fncachewarned:
214 214 ui.warn(HINT_FNCACHE)
215 215 if self.errors:
216 216 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
217 217 if self.badrevs:
218 218 msg = _(b"(first damaged changeset appears to be %d)\n")
219 219 msg %= min(self.badrevs)
220 220 ui.warn(msg)
221 221 return 1
222 222 return 0
223 223
224 224 def _verifychangelog(self):
225 225 """verify the changelog of a repository
226 226
227 227 The following checks are performed:
228 228 - all of `_checkrevlog` checks,
229 229 - all of `_checkentry` checks (for each revisions),
230 230 - each revision can be read.
231 231
232 232 The function returns some of the data observed in the changesets as a
233 233 (mflinkrevs, filelinkrevs) tuples:
234 234 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
235 235 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
236 236
237 237 If a matcher was specified, filelinkrevs will only contains matched
238 238 files.
239 239 """
240 240 ui = self.ui
241 241 repo = self.repo
242 242 match = self.match
243 243 cl = repo.changelog
244 244
245 245 ui.status(_(b"checking changesets\n"))
246 246 mflinkrevs = {}
247 247 filelinkrevs = {}
248 248 seen = {}
249 249 self._checkrevlog(cl, b"changelog", 0)
250 250 progress = ui.makeprogress(
251 251 _(b'checking'), unit=_(b'changesets'), total=len(repo)
252 252 )
253 253 for i in repo:
254 254 progress.update(i)
255 255 n = cl.node(i)
256 256 self._checkentry(cl, i, n, seen, [i], b"changelog")
257 257
258 258 try:
259 259 changes = cl.read(n)
260 260 if changes[0] != self.repo.nullid:
261 261 mflinkrevs.setdefault(changes[0], []).append(i)
262 262 self.refersmf = True
263 263 for f in changes[3]:
264 264 if match(f):
265 265 filelinkrevs.setdefault(_normpath(f), []).append(i)
266 266 except Exception as inst:
267 267 self.refersmf = True
268 268 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
269 269 progress.complete()
270 270 return mflinkrevs, filelinkrevs
271 271
272 272 def _verifymanifest(
273 273 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
274 274 ):
275 275 """verify the manifestlog content
276 276
277 277 Inputs:
278 278 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
279 279 - dir: a subdirectory to check (for tree manifest repo)
280 280 - storefiles: set of currently "orphan" files.
281 281 - subdirprogress: a progress object
282 282
283 283 This function checks:
284 284 * all of `_checkrevlog` checks (for all manifest related revlogs)
285 285 * all of `_checkentry` checks (for all manifest related revisions)
286 286 * nodes for subdirectory exists in the sub-directory manifest
287 287 * each manifest entries have a file path
288 288 * each manifest node refered in mflinkrevs exist in the manifest log
289 289
290 290 If tree manifest is in use and a matchers is specified, only the
291 291 sub-directories matching it will be verified.
292 292
293 293 return a two level mapping:
294 294 {"path" -> { filenode -> changelog-revision}}
295 295
296 296 This mapping primarily contains entries for every files in the
297 297 repository. In addition, when tree-manifest is used, it also contains
298 298 sub-directory entries.
299 299
300 300 If a matcher is provided, only matching paths will be included.
301 301 """
302 302 repo = self.repo
303 303 ui = self.ui
304 304 match = self.match
305 305 mfl = self.repo.manifestlog
306 306 mf = mfl.getstorage(dir)
307 307
308 308 if not dir:
309 309 self.ui.status(_(b"checking manifests\n"))
310 310
311 311 filenodes = {}
312 312 subdirnodes = {}
313 313 seen = {}
314 314 label = b"manifest"
315 315 if dir:
316 316 label = dir
317 317 revlogfiles = mf.files()
318 318 storefiles.difference_update(revlogfiles)
319 319 if subdirprogress: # should be true since we're in a subdirectory
320 320 subdirprogress.increment()
321 321 if self.refersmf:
322 322 # Do not check manifest if there are only changelog entries with
323 323 # null manifests.
324 324 self._checkrevlog(mf._revlog, label, 0)
325 325 progress = ui.makeprogress(
326 326 _(b'checking'), unit=_(b'manifests'), total=len(mf)
327 327 )
328 328 for i in mf:
329 329 if not dir:
330 330 progress.update(i)
331 331 n = mf.node(i)
332 332 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
333 333 if n in mflinkrevs:
334 334 del mflinkrevs[n]
335 335 elif dir:
336 336 msg = _(b"%s not in parent-directory manifest") % short(n)
337 337 self._err(lr, msg, label)
338 338 else:
339 339 self._err(lr, _(b"%s not in changesets") % short(n), label)
340 340
341 341 try:
342 342 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
343 343 for f, fn, fl in mfdelta.iterentries():
344 344 if not f:
345 345 self._err(lr, _(b"entry without name in manifest"))
346 346 elif f == b"/dev/null": # ignore this in very old repos
347 347 continue
348 348 fullpath = dir + _normpath(f)
349 349 if fl == b't':
350 350 if not match.visitdir(fullpath):
351 351 continue
352 352 sdn = subdirnodes.setdefault(fullpath + b'/', {})
353 353 sdn.setdefault(fn, []).append(lr)
354 354 else:
355 355 if not match(fullpath):
356 356 continue
357 357 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
358 358 except Exception as inst:
359 359 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
360 360 if self._level >= VERIFY_FULL:
361 361 try:
362 362 # Various issues can affect manifest. So we read each full
363 363 # text from storage. This triggers the checks from the core
364 364 # code (eg: hash verification, filename are ordered, etc.)
365 365 mfdelta = mfl.get(dir, n).read()
366 366 except Exception as inst:
367 367 msg = _(b"reading full manifest %s") % short(n)
368 368 self._exc(lr, msg, inst, label)
369 369
370 370 if not dir:
371 371 progress.complete()
372 372
373 373 if self.havemf:
374 374 # since we delete entry in `mflinkrevs` during iteration, any
375 375 # remaining entries are "missing". We need to issue errors for them.
376 376 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
377 377 for c, m in sorted(changesetpairs):
378 378 if dir:
379 379 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
380 380 else:
381 381 msg = _(b"changeset refers to unknown revision %s")
382 382 msg %= short(m)
383 383 self._err(c, msg, label)
384 384
385 385 if not dir and subdirnodes:
386 386 self.ui.status(_(b"checking directory manifests\n"))
387 387 storefiles = set()
388 388 subdirs = set()
389 389 revlogv1 = self.revlogv1
390 390 for t, f, f2, size in repo.store.datafiles():
391 391 if not f:
392 392 self._err(None, _(b"cannot decode filename '%s'") % f2)
393 393 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
394 394 storefiles.add(_normpath(f))
395 395 subdirs.add(os.path.dirname(f))
396 396 subdirprogress = ui.makeprogress(
397 397 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
398 398 )
399 399
400 400 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
401 401 subdirfilenodes = self._verifymanifest(
402 402 linkrevs, subdir, storefiles, subdirprogress
403 403 )
404 404 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
405 405 filenodes.setdefault(f, {}).update(onefilenodes)
406 406
407 407 if not dir and subdirnodes:
408 408 assert subdirprogress is not None # help pytype
409 409 subdirprogress.complete()
410 410 if self.warnorphanstorefiles:
411 411 for f in sorted(storefiles):
412 412 self._warn(_(b"warning: orphan data file '%s'") % f)
413 413
414 414 return filenodes
415 415
416 416 def _crosscheckfiles(self, filelinkrevs, filenodes):
417 417 repo = self.repo
418 418 ui = self.ui
419 419 ui.status(_(b"crosschecking files in changesets and manifests\n"))
420 420
421 421 total = len(filelinkrevs) + len(filenodes)
422 422 progress = ui.makeprogress(
423 423 _(b'crosschecking'), unit=_(b'files'), total=total
424 424 )
425 425 if self.havemf:
426 426 for f in sorted(filelinkrevs):
427 427 progress.increment()
428 428 if f not in filenodes:
429 429 lr = filelinkrevs[f][0]
430 430 self._err(lr, _(b"in changeset but not in manifest"), f)
431 431
432 432 if self.havecl:
433 433 for f in sorted(filenodes):
434 434 progress.increment()
435 435 if f not in filelinkrevs:
436 436 try:
437 437 fl = repo.file(f)
438 438 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
439 439 except Exception:
440 440 lr = None
441 441 self._err(lr, _(b"in manifest but not in changeset"), f)
442 442
443 443 progress.complete()
444 444
445 445 def _verifyfiles(self, filenodes, filelinkrevs):
446 446 repo = self.repo
447 447 ui = self.ui
448 448 lrugetctx = self.lrugetctx
449 449 revlogv1 = self.revlogv1
450 450 havemf = self.havemf
451 451 ui.status(_(b"checking files\n"))
452 452
453 453 storefiles = set()
454 454 for rl_type, f, f2, size in repo.store.datafiles():
455 455 if not f:
456 456 self._err(None, _(b"cannot decode filename '%s'") % f2)
457 457 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
458 458 storefiles.add(_normpath(f))
459 459
460 460 state = {
461 461 # TODO this assumes revlog storage for changelog.
462 462 b'expectedversion': self.repo.changelog._format_version,
463 463 b'skipflags': self.skipflags,
464 464 # experimental config: censor.policy
465 465 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
466 466 }
467 467
468 468 files = sorted(set(filenodes) | set(filelinkrevs))
469 469 revisions = 0
470 470 progress = ui.makeprogress(
471 471 _(b'checking'), unit=_(b'files'), total=len(files)
472 472 )
473 473 for i, f in enumerate(files):
474 474 progress.update(i, item=f)
475 475 try:
476 476 linkrevs = filelinkrevs[f]
477 477 except KeyError:
478 478 # in manifest but not in changelog
479 479 linkrevs = []
480 480
481 481 if linkrevs:
482 482 lr = linkrevs[0]
483 483 else:
484 484 lr = None
485 485
486 486 try:
487 487 fl = repo.file(f)
488 488 except error.StorageError as e:
489 489 self._err(lr, _(b"broken revlog! (%s)") % e, f)
490 490 continue
491 491
492 492 for ff in fl.files():
493 493 try:
494 494 storefiles.remove(ff)
495 495 except KeyError:
496 496 if self.warnorphanstorefiles:
497 self._warn(
498 _(b" warning: revlog '%s' not in fncache!") % ff
499 )
497 msg = _(b" warning: revlog '%s' not in fncache!")
498 self._warn(msg % ff)
500 499 self.fncachewarned = True
501 500
502 501 if not len(fl) and (self.havecl or self.havemf):
503 502 self._err(lr, _(b"empty or missing %s") % f)
504 503 else:
505 504 # Guard against implementations not setting this.
506 505 state[b'skipread'] = set()
507 506 state[b'safe_renamed'] = set()
508 507
509 508 for problem in fl.verifyintegrity(state):
510 509 if problem.node is not None:
511 510 linkrev = fl.linkrev(fl.rev(problem.node))
512 511 else:
513 512 linkrev = None
514 513
515 514 if problem.warning:
516 515 self._warn(problem.warning)
517 516 elif problem.error:
518 517 self._err(
519 518 linkrev if linkrev is not None else lr,
520 519 problem.error,
521 520 f,
522 521 )
523 522 else:
524 523 raise error.ProgrammingError(
525 524 b'problem instance does not set warning or error '
526 525 b'attribute: %s' % problem.msg
527 526 )
528 527
529 528 seen = {}
530 529 for i in fl:
531 530 revisions += 1
532 531 n = fl.node(i)
533 532 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
534 533 if f in filenodes:
535 534 if havemf and n not in filenodes[f]:
536 535 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
537 536 else:
538 537 del filenodes[f][n]
539 538
540 539 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
541 540 continue
542 541
543 542 # check renames
544 543 try:
545 544 # This requires resolving fulltext (at least on revlogs,
546 545 # though not with LFS revisions). We may want
547 546 # ``verifyintegrity()`` to pass a set of nodes with
548 547 # rename metadata as an optimization.
549 548 rp = fl.renamed(n)
550 549 if rp:
551 550 if lr is not None and ui.verbose:
552 551 ctx = lrugetctx(lr)
553 552 if not any(rp[0] in pctx for pctx in ctx.parents()):
554 553 self._warn(
555 554 _(
556 555 b"warning: copy source of '%s' not"
557 556 b" in parents of %s"
558 557 )
559 558 % (f, ctx)
560 559 )
561 560 fl2 = repo.file(rp[0])
562 561 if not len(fl2):
563 562 self._err(
564 563 lr,
565 564 _(
566 565 b"empty or missing copy source revlog "
567 566 b"%s:%s"
568 567 )
569 568 % (rp[0], short(rp[1])),
570 569 f,
571 570 )
572 571 elif rp[1] == self.repo.nullid:
573 572 ui.note(
574 573 _(
575 574 b"warning: %s@%s: copy source"
576 575 b" revision is nullid %s:%s\n"
577 576 )
578 577 % (f, lr, rp[0], short(rp[1]))
579 578 )
580 579 else:
581 580 fl2.rev(rp[1])
582 581 except Exception as inst:
583 582 self._exc(
584 583 lr, _(b"checking rename of %s") % short(n), inst, f
585 584 )
586 585
587 586 # cross-check
588 587 if f in filenodes:
589 588 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
590 589 for lr, node in sorted(fns):
591 590 self._err(
592 591 lr,
593 592 _(b"manifest refers to unknown revision %s")
594 593 % short(node),
595 594 f,
596 595 )
597 596 progress.complete()
598 597
599 598 if self.warnorphanstorefiles:
600 599 for f in sorted(storefiles):
601 600 self._warn(_(b"warning: orphan data file '%s'") % f)
602 601
603 602 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now