##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48159:14e76fd8 default draft
parent child Browse files
Show More
@@ -1,590 +1,586 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import short
14 14 from .utils import stringutil
15 15
16 16 from . import (
17 17 error,
18 18 pycompat,
19 19 revlog,
20 20 util,
21 21 )
22 22
23 23 VERIFY_DEFAULT = 0
24 24 VERIFY_FULL = 1
25 25
26 26
27 27 def verify(repo, level=None):
28 28 with repo.lock():
29 29 v = verifier(repo, level)
30 30 return v.verify()
31 31
32 32
33 33 def _normpath(f):
34 34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 35 # converted repo may contain repeated slashes
36 36 while b'//' in f:
37 37 f = f.replace(b'//', b'/')
38 38 return f
39 39
40 40
41 41 HINT_FNCACHE = _(
42 42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 43 )
44 44
45 45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 46 b"parent-directory manifest refers to unknown revision %s"
47 47 )
48 48
49 49 WARN_UNKNOWN_COPY_SOURCE = _(
50 50 b"warning: copy source of '%s' not in parents of %s"
51 51 )
52 52
53 53 WARN_NULLID_COPY_SOURCE = _(
54 54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
55 55 )
56 56
57 57
58 58 class verifier(object):
59 59 def __init__(self, repo, level=None):
60 60 self.repo = repo.unfiltered()
61 61 self.ui = repo.ui
62 62 self.match = repo.narrowmatch()
63 63 if level is None:
64 64 level = VERIFY_DEFAULT
65 65 self._level = level
66 66 self.badrevs = set()
67 67 self.errors = 0
68 68 self.warnings = 0
69 69 self.havecl = len(repo.changelog) > 0
70 70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
71 71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
72 72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
73 73 self.refersmf = False
74 74 self.fncachewarned = False
75 75 # developer config: verify.skipflags
76 76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
77 77 self.warnorphanstorefiles = True
78 78
79 79 def _warn(self, msg):
80 80 """record a "warning" level issue"""
81 81 self.ui.warn(msg + b"\n")
82 82 self.warnings += 1
83 83
84 84 def _err(self, linkrev, msg, filename=None):
85 85 """record a "error" level issue"""
86 86 if linkrev is not None:
87 87 self.badrevs.add(linkrev)
88 88 linkrev = b"%d" % linkrev
89 89 else:
90 90 linkrev = b'?'
91 91 msg = b"%s: %s" % (linkrev, msg)
92 92 if filename:
93 93 msg = b"%s@%s" % (filename, msg)
94 94 self.ui.warn(b" " + msg + b"\n")
95 95 self.errors += 1
96 96
97 97 def _exc(self, linkrev, msg, inst, filename=None):
98 98 """record exception raised during the verify process"""
99 99 fmsg = stringutil.forcebytestr(inst)
100 100 if not fmsg:
101 101 fmsg = pycompat.byterepr(inst)
102 102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
103 103
104 104 def _checkrevlog(self, obj, name, linkrev):
105 105 """verify high level property of a revlog
106 106
107 107 - revlog is present,
108 108 - revlog is non-empty,
109 109 - sizes (index and data) are correct,
110 110 - revlog's format version is correct.
111 111 """
112 112 if not len(obj) and (self.havecl or self.havemf):
113 113 self._err(linkrev, _(b"empty or missing %s") % name)
114 114 return
115 115
116 116 d = obj.checksize()
117 117 if d[0]:
118 118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
119 119 if d[1]:
120 120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
121 121
122 122 if obj._format_version != revlog.REVLOGV0:
123 123 if not self.revlogv1:
124 124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
125 125 elif self.revlogv1:
126 126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
127 127
128 128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
129 129 """verify a single revlog entry
130 130
131 131 arguments are:
132 132 - obj: the source revlog
133 133 - i: the revision number
134 134 - node: the revision node id
135 135 - seen: nodes previously seen for this revlog
136 136 - linkrevs: [changelog-revisions] introducing "node"
137 137 - f: string label ("changelog", "manifest", or filename)
138 138
139 139 Performs the following checks:
140 140 - linkrev points to an existing changelog revision,
141 141 - linkrev points to a changelog revision that introduces this revision,
142 142 - linkrev points to the lowest of these changesets,
143 143 - both parents exist in the revlog,
144 144 - the revision is not duplicated.
145 145
146 146 Return the linkrev of the revision (or None for changelog's revisions).
147 147 """
148 148 lr = obj.linkrev(obj.rev(node))
149 149 if lr < 0 or (self.havecl and lr not in linkrevs):
150 150 if lr < 0 or lr >= len(self.repo.changelog):
151 151 msg = _(b"rev %d points to nonexistent changeset %d")
152 152 else:
153 153 msg = _(b"rev %d points to unexpected changeset %d")
154 154 self._err(None, msg % (i, lr), f)
155 155 if linkrevs:
156 156 if f and len(linkrevs) > 1:
157 157 try:
158 158 # attempt to filter down to real linkrevs
159 159 linkrevs = []
160 160 for lr in linkrevs:
161 161 if self.lrugetctx(lr)[f].filenode() == node:
162 162 linkrevs.append(lr)
163 163 except Exception:
164 164 pass
165 165 msg = _(b" (expected %s)")
166 166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
167 167 self._warn(msg)
168 168 lr = None # can't be trusted
169 169
170 170 try:
171 171 p1, p2 = obj.parents(node)
172 172 if p1 not in seen and p1 != self.repo.nullid:
173 173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
174 174 self._err(lr, msg, f)
175 175 if p2 not in seen and p2 != self.repo.nullid:
176 176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
177 177 self._err(lr, msg, f)
178 178 except Exception as inst:
179 179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
180 180
181 181 if node in seen:
182 182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
183 183 seen[node] = i
184 184 return lr
185 185
186 186 def verify(self):
187 187 """verify the content of the Mercurial repository
188 188
189 189 This method run all verifications, displaying issues as they are found.
190 190
191 191 return 1 if any error have been encountered, 0 otherwise."""
192 192 # initial validation and generic report
193 193 repo = self.repo
194 194 ui = repo.ui
195 195 if not repo.url().startswith(b'file:'):
196 196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
197 197
198 198 if os.path.exists(repo.sjoin(b"journal")):
199 199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
200 200
201 201 if ui.verbose or not self.revlogv1:
202 202 ui.status(
203 203 _(b"repository uses revlog format %d\n")
204 204 % (self.revlogv1 and 1 or 0)
205 205 )
206 206
207 207 # data verification
208 208 mflinkrevs, filelinkrevs = self._verifychangelog()
209 209 filenodes = self._verifymanifest(mflinkrevs)
210 210 del mflinkrevs
211 211 self._crosscheckfiles(filelinkrevs, filenodes)
212 212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
213 213
214 214 # final report
215 215 ui.status(
216 216 _(b"checked %d changesets with %d changes to %d files\n")
217 217 % (len(repo.changelog), filerevisions, totalfiles)
218 218 )
219 219 if self.warnings:
220 220 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
221 221 if self.fncachewarned:
222 222 ui.warn(HINT_FNCACHE)
223 223 if self.errors:
224 224 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
225 225 if self.badrevs:
226 226 msg = _(b"(first damaged changeset appears to be %d)\n")
227 227 msg %= min(self.badrevs)
228 228 ui.warn(msg)
229 229 return 1
230 230 return 0
231 231
232 232 def _verifychangelog(self):
233 233 """verify the changelog of a repository
234 234
235 235 The following checks are performed:
236 236 - all of `_checkrevlog` checks,
237 237 - all of `_checkentry` checks (for each revisions),
238 238 - each revision can be read.
239 239
240 240 The function returns some of the data observed in the changesets as a
241 241 (mflinkrevs, filelinkrevs) tuples:
242 242 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
243 243 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
244 244
245 245 If a matcher was specified, filelinkrevs will only contains matched
246 246 files.
247 247 """
248 248 ui = self.ui
249 249 repo = self.repo
250 250 match = self.match
251 251 cl = repo.changelog
252 252
253 253 ui.status(_(b"checking changesets\n"))
254 254 mflinkrevs = {}
255 255 filelinkrevs = {}
256 256 seen = {}
257 257 self._checkrevlog(cl, b"changelog", 0)
258 258 progress = ui.makeprogress(
259 259 _(b'checking'), unit=_(b'changesets'), total=len(repo)
260 260 )
261 261 for i in repo:
262 262 progress.update(i)
263 263 n = cl.node(i)
264 264 self._checkentry(cl, i, n, seen, [i], b"changelog")
265 265
266 266 try:
267 267 changes = cl.read(n)
268 268 if changes[0] != self.repo.nullid:
269 269 mflinkrevs.setdefault(changes[0], []).append(i)
270 270 self.refersmf = True
271 271 for f in changes[3]:
272 272 if match(f):
273 273 filelinkrevs.setdefault(_normpath(f), []).append(i)
274 274 except Exception as inst:
275 275 self.refersmf = True
276 276 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
277 277 progress.complete()
278 278 return mflinkrevs, filelinkrevs
279 279
280 280 def _verifymanifest(
281 281 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
282 282 ):
283 283 """verify the manifestlog content
284 284
285 285 Inputs:
286 286 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
287 287 - dir: a subdirectory to check (for tree manifest repo)
288 288 - storefiles: set of currently "orphan" files.
289 289 - subdirprogress: a progress object
290 290
291 291 This function checks:
292 292 * all of `_checkrevlog` checks (for all manifest related revlogs)
293 293 * all of `_checkentry` checks (for all manifest related revisions)
294 294 * nodes for subdirectory exists in the sub-directory manifest
295 295 * each manifest entries have a file path
296 296 * each manifest node refered in mflinkrevs exist in the manifest log
297 297
298 298 If tree manifest is in use and a matchers is specified, only the
299 299 sub-directories matching it will be verified.
300 300
301 301 return a two level mapping:
302 302 {"path" -> { filenode -> changelog-revision}}
303 303
304 304 This mapping primarily contains entries for every files in the
305 305 repository. In addition, when tree-manifest is used, it also contains
306 306 sub-directory entries.
307 307
308 308 If a matcher is provided, only matching paths will be included.
309 309 """
310 310 repo = self.repo
311 311 ui = self.ui
312 312 match = self.match
313 313 mfl = self.repo.manifestlog
314 314 mf = mfl.getstorage(dir)
315 315
316 316 if not dir:
317 317 self.ui.status(_(b"checking manifests\n"))
318 318
319 319 filenodes = {}
320 320 subdirnodes = {}
321 321 seen = {}
322 322 label = b"manifest"
323 323 if dir:
324 324 label = dir
325 325 revlogfiles = mf.files()
326 326 storefiles.difference_update(revlogfiles)
327 327 if subdirprogress: # should be true since we're in a subdirectory
328 328 subdirprogress.increment()
329 329 if self.refersmf:
330 330 # Do not check manifest if there are only changelog entries with
331 331 # null manifests.
332 332 self._checkrevlog(mf._revlog, label, 0)
333 333 progress = ui.makeprogress(
334 334 _(b'checking'), unit=_(b'manifests'), total=len(mf)
335 335 )
336 336 for i in mf:
337 337 if not dir:
338 338 progress.update(i)
339 339 n = mf.node(i)
340 340 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
341 341 if n in mflinkrevs:
342 342 del mflinkrevs[n]
343 343 elif dir:
344 344 msg = _(b"%s not in parent-directory manifest") % short(n)
345 345 self._err(lr, msg, label)
346 346 else:
347 347 self._err(lr, _(b"%s not in changesets") % short(n), label)
348 348
349 349 try:
350 350 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
351 351 for f, fn, fl in mfdelta.iterentries():
352 352 if not f:
353 353 self._err(lr, _(b"entry without name in manifest"))
354 354 elif f == b"/dev/null": # ignore this in very old repos
355 355 continue
356 356 fullpath = dir + _normpath(f)
357 357 if fl == b't':
358 358 if not match.visitdir(fullpath):
359 359 continue
360 360 sdn = subdirnodes.setdefault(fullpath + b'/', {})
361 361 sdn.setdefault(fn, []).append(lr)
362 362 else:
363 363 if not match(fullpath):
364 364 continue
365 365 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
366 366 except Exception as inst:
367 367 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
368 368 if self._level >= VERIFY_FULL:
369 369 try:
370 370 # Various issues can affect manifest. So we read each full
371 371 # text from storage. This triggers the checks from the core
372 372 # code (eg: hash verification, filename are ordered, etc.)
373 373 mfdelta = mfl.get(dir, n).read()
374 374 except Exception as inst:
375 375 msg = _(b"reading full manifest %s") % short(n)
376 376 self._exc(lr, msg, inst, label)
377 377
378 378 if not dir:
379 379 progress.complete()
380 380
381 381 if self.havemf:
382 382 # since we delete entry in `mflinkrevs` during iteration, any
383 383 # remaining entries are "missing". We need to issue errors for them.
384 384 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
385 385 for c, m in sorted(changesetpairs):
386 386 if dir:
387 387 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
388 388 else:
389 389 msg = _(b"changeset refers to unknown revision %s")
390 390 msg %= short(m)
391 391 self._err(c, msg, label)
392 392
393 393 if not dir and subdirnodes:
394 394 self.ui.status(_(b"checking directory manifests\n"))
395 395 storefiles = set()
396 396 subdirs = set()
397 397 revlogv1 = self.revlogv1
398 398 for t, f, f2, size in repo.store.datafiles():
399 399 if not f:
400 400 self._err(None, _(b"cannot decode filename '%s'") % f2)
401 401 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
402 402 storefiles.add(_normpath(f))
403 403 subdirs.add(os.path.dirname(f))
404 404 subdirprogress = ui.makeprogress(
405 405 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
406 406 )
407 407
408 408 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
409 409 subdirfilenodes = self._verifymanifest(
410 410 linkrevs, subdir, storefiles, subdirprogress
411 411 )
412 412 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
413 413 filenodes.setdefault(f, {}).update(onefilenodes)
414 414
415 415 if not dir and subdirnodes:
416 416 assert subdirprogress is not None # help pytype
417 417 subdirprogress.complete()
418 418 if self.warnorphanstorefiles:
419 419 for f in sorted(storefiles):
420 420 self._warn(_(b"warning: orphan data file '%s'") % f)
421 421
422 422 return filenodes
423 423
424 424 def _crosscheckfiles(self, filelinkrevs, filenodes):
425 425 repo = self.repo
426 426 ui = self.ui
427 427 ui.status(_(b"crosschecking files in changesets and manifests\n"))
428 428
429 429 total = len(filelinkrevs) + len(filenodes)
430 430 progress = ui.makeprogress(
431 431 _(b'crosschecking'), unit=_(b'files'), total=total
432 432 )
433 433 if self.havemf:
434 434 for f in sorted(filelinkrevs):
435 435 progress.increment()
436 436 if f not in filenodes:
437 437 lr = filelinkrevs[f][0]
438 438 self._err(lr, _(b"in changeset but not in manifest"), f)
439 439
440 440 if self.havecl:
441 441 for f in sorted(filenodes):
442 442 progress.increment()
443 443 if f not in filelinkrevs:
444 444 try:
445 445 fl = repo.file(f)
446 446 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
447 447 except Exception:
448 448 lr = None
449 449 self._err(lr, _(b"in manifest but not in changeset"), f)
450 450
451 451 progress.complete()
452 452
453 453 def _verifyfiles(self, filenodes, filelinkrevs):
454 454 repo = self.repo
455 455 ui = self.ui
456 456 lrugetctx = self.lrugetctx
457 457 revlogv1 = self.revlogv1
458 458 havemf = self.havemf
459 459 ui.status(_(b"checking files\n"))
460 460
461 461 storefiles = set()
462 462 for rl_type, f, f2, size in repo.store.datafiles():
463 463 if not f:
464 464 self._err(None, _(b"cannot decode filename '%s'") % f2)
465 465 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
466 466 storefiles.add(_normpath(f))
467 467
468 468 state = {
469 469 # TODO this assumes revlog storage for changelog.
470 470 b'expectedversion': self.repo.changelog._format_version,
471 471 b'skipflags': self.skipflags,
472 472 # experimental config: censor.policy
473 473 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
474 474 }
475 475
476 476 files = sorted(set(filenodes) | set(filelinkrevs))
477 477 revisions = 0
478 478 progress = ui.makeprogress(
479 479 _(b'checking'), unit=_(b'files'), total=len(files)
480 480 )
481 481 for i, f in enumerate(files):
482 482 progress.update(i, item=f)
483 483 try:
484 484 linkrevs = filelinkrevs[f]
485 485 except KeyError:
486 486 # in manifest but not in changelog
487 487 linkrevs = []
488 488
489 489 if linkrevs:
490 490 lr = linkrevs[0]
491 491 else:
492 492 lr = None
493 493
494 494 try:
495 495 fl = repo.file(f)
496 496 except error.StorageError as e:
497 497 self._err(lr, _(b"broken revlog! (%s)") % e, f)
498 498 continue
499 499
500 500 for ff in fl.files():
501 501 try:
502 502 storefiles.remove(ff)
503 503 except KeyError:
504 504 if self.warnorphanstorefiles:
505 505 msg = _(b" warning: revlog '%s' not in fncache!")
506 506 self._warn(msg % ff)
507 507 self.fncachewarned = True
508 508
509 509 if not len(fl) and (self.havecl or self.havemf):
510 510 self._err(lr, _(b"empty or missing %s") % f)
511 511 else:
512 512 # Guard against implementations not setting this.
513 513 state[b'skipread'] = set()
514 514 state[b'safe_renamed'] = set()
515 515
516 516 for problem in fl.verifyintegrity(state):
517 517 if problem.node is not None:
518 518 linkrev = fl.linkrev(fl.rev(problem.node))
519 519 else:
520 520 linkrev = None
521 521
522 522 if problem.warning:
523 523 self._warn(problem.warning)
524 524 elif problem.error:
525 525 linkrev_msg = linkrev if linkrev is not None else lr
526 526 self._err(linkrev_msg, problem.error, f)
527 527 else:
528 528 raise error.ProgrammingError(
529 529 b'problem instance does not set warning or error '
530 530 b'attribute: %s' % problem.msg
531 531 )
532 532
533 533 seen = {}
534 534 for i in fl:
535 535 revisions += 1
536 536 n = fl.node(i)
537 537 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
538 538 if f in filenodes:
539 539 if havemf and n not in filenodes[f]:
540 540 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
541 541 else:
542 542 del filenodes[f][n]
543 543
544 544 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
545 545 continue
546 546
547 547 # check renames
548 548 try:
549 549 # This requires resolving fulltext (at least on revlogs,
550 550 # though not with LFS revisions). We may want
551 551 # ``verifyintegrity()`` to pass a set of nodes with
552 552 # rename metadata as an optimization.
553 553 rp = fl.renamed(n)
554 554 if rp:
555 555 if lr is not None and ui.verbose:
556 556 ctx = lrugetctx(lr)
557 557 if not any(rp[0] in pctx for pctx in ctx.parents()):
558 558 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
559 559 fl2 = repo.file(rp[0])
560 560 if not len(fl2):
561 561 m = _(b"empty or missing copy source revlog %s:%s")
562 562 self._err(lr, m % (rp[0], short(rp[1])), f)
563 563 elif rp[1] == self.repo.nullid:
564 564 msg = WARN_NULLID_COPY_SOURCE
565 565 msg %= (f, lr, rp[0], short(rp[1]))
566 566 ui.note(msg)
567 567 else:
568 568 fl2.rev(rp[1])
569 569 except Exception as inst:
570 570 self._exc(
571 571 lr, _(b"checking rename of %s") % short(n), inst, f
572 572 )
573 573
574 574 # cross-check
575 575 if f in filenodes:
576 576 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
577 577 for lr, node in sorted(fns):
578 self._err(
579 lr,
580 _(b"manifest refers to unknown revision %s")
581 % short(node),
582 f,
583 )
578 msg = _(b"manifest refers to unknown revision %s")
579 self._err(lr, msg % short(node), f)
584 580 progress.complete()
585 581
586 582 if self.warnorphanstorefiles:
587 583 for f in sorted(storefiles):
588 584 self._warn(_(b"warning: orphan data file '%s'") % f)
589 585
590 586 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now