##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48158:041d6515 default
parent child Browse files
Show More
@@ -1,590 +1,590 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import short
14 14 from .utils import stringutil
15 15
16 16 from . import (
17 17 error,
18 18 pycompat,
19 19 revlog,
20 20 util,
21 21 )
22 22
23 23 VERIFY_DEFAULT = 0
24 24 VERIFY_FULL = 1
25 25
26 26
27 27 def verify(repo, level=None):
28 28 with repo.lock():
29 29 v = verifier(repo, level)
30 30 return v.verify()
31 31
32 32
33 33 def _normpath(f):
34 34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 35 # converted repo may contain repeated slashes
36 36 while b'//' in f:
37 37 f = f.replace(b'//', b'/')
38 38 return f
39 39
40 40
41 41 HINT_FNCACHE = _(
42 42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 43 )
44 44
45 45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 46 b"parent-directory manifest refers to unknown revision %s"
47 47 )
48 48
49 49 WARN_UNKNOWN_COPY_SOURCE = _(
50 50 b"warning: copy source of '%s' not in parents of %s"
51 51 )
52 52
53 WARN_NULLID_COPY_SOURCE = _(
54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
55 )
56
53 57
54 58 class verifier(object):
55 59 def __init__(self, repo, level=None):
56 60 self.repo = repo.unfiltered()
57 61 self.ui = repo.ui
58 62 self.match = repo.narrowmatch()
59 63 if level is None:
60 64 level = VERIFY_DEFAULT
61 65 self._level = level
62 66 self.badrevs = set()
63 67 self.errors = 0
64 68 self.warnings = 0
65 69 self.havecl = len(repo.changelog) > 0
66 70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
67 71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
68 72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
69 73 self.refersmf = False
70 74 self.fncachewarned = False
71 75 # developer config: verify.skipflags
72 76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
73 77 self.warnorphanstorefiles = True
74 78
75 79 def _warn(self, msg):
76 80 """record a "warning" level issue"""
77 81 self.ui.warn(msg + b"\n")
78 82 self.warnings += 1
79 83
80 84 def _err(self, linkrev, msg, filename=None):
81 85 """record a "error" level issue"""
82 86 if linkrev is not None:
83 87 self.badrevs.add(linkrev)
84 88 linkrev = b"%d" % linkrev
85 89 else:
86 90 linkrev = b'?'
87 91 msg = b"%s: %s" % (linkrev, msg)
88 92 if filename:
89 93 msg = b"%s@%s" % (filename, msg)
90 94 self.ui.warn(b" " + msg + b"\n")
91 95 self.errors += 1
92 96
93 97 def _exc(self, linkrev, msg, inst, filename=None):
94 98 """record exception raised during the verify process"""
95 99 fmsg = stringutil.forcebytestr(inst)
96 100 if not fmsg:
97 101 fmsg = pycompat.byterepr(inst)
98 102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
99 103
100 104 def _checkrevlog(self, obj, name, linkrev):
101 105 """verify high level property of a revlog
102 106
103 107 - revlog is present,
104 108 - revlog is non-empty,
105 109 - sizes (index and data) are correct,
106 110 - revlog's format version is correct.
107 111 """
108 112 if not len(obj) and (self.havecl or self.havemf):
109 113 self._err(linkrev, _(b"empty or missing %s") % name)
110 114 return
111 115
112 116 d = obj.checksize()
113 117 if d[0]:
114 118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
115 119 if d[1]:
116 120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
117 121
118 122 if obj._format_version != revlog.REVLOGV0:
119 123 if not self.revlogv1:
120 124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
121 125 elif self.revlogv1:
122 126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
123 127
124 128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
125 129 """verify a single revlog entry
126 130
127 131 arguments are:
128 132 - obj: the source revlog
129 133 - i: the revision number
130 134 - node: the revision node id
131 135 - seen: nodes previously seen for this revlog
132 136 - linkrevs: [changelog-revisions] introducing "node"
133 137 - f: string label ("changelog", "manifest", or filename)
134 138
135 139 Performs the following checks:
136 140 - linkrev points to an existing changelog revision,
137 141 - linkrev points to a changelog revision that introduces this revision,
138 142 - linkrev points to the lowest of these changesets,
139 143 - both parents exist in the revlog,
140 144 - the revision is not duplicated.
141 145
142 146 Return the linkrev of the revision (or None for changelog's revisions).
143 147 """
144 148 lr = obj.linkrev(obj.rev(node))
145 149 if lr < 0 or (self.havecl and lr not in linkrevs):
146 150 if lr < 0 or lr >= len(self.repo.changelog):
147 151 msg = _(b"rev %d points to nonexistent changeset %d")
148 152 else:
149 153 msg = _(b"rev %d points to unexpected changeset %d")
150 154 self._err(None, msg % (i, lr), f)
151 155 if linkrevs:
152 156 if f and len(linkrevs) > 1:
153 157 try:
154 158 # attempt to filter down to real linkrevs
155 159 linkrevs = []
156 160 for lr in linkrevs:
157 161 if self.lrugetctx(lr)[f].filenode() == node:
158 162 linkrevs.append(lr)
159 163 except Exception:
160 164 pass
161 165 msg = _(b" (expected %s)")
162 166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
163 167 self._warn(msg)
164 168 lr = None # can't be trusted
165 169
166 170 try:
167 171 p1, p2 = obj.parents(node)
168 172 if p1 not in seen and p1 != self.repo.nullid:
169 173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
170 174 self._err(lr, msg, f)
171 175 if p2 not in seen and p2 != self.repo.nullid:
172 176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
173 177 self._err(lr, msg, f)
174 178 except Exception as inst:
175 179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
176 180
177 181 if node in seen:
178 182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
179 183 seen[node] = i
180 184 return lr
181 185
182 186 def verify(self):
183 187 """verify the content of the Mercurial repository
184 188
185 189 This method run all verifications, displaying issues as they are found.
186 190
187 191 return 1 if any error have been encountered, 0 otherwise."""
188 192 # initial validation and generic report
189 193 repo = self.repo
190 194 ui = repo.ui
191 195 if not repo.url().startswith(b'file:'):
192 196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
193 197
194 198 if os.path.exists(repo.sjoin(b"journal")):
195 199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
196 200
197 201 if ui.verbose or not self.revlogv1:
198 202 ui.status(
199 203 _(b"repository uses revlog format %d\n")
200 204 % (self.revlogv1 and 1 or 0)
201 205 )
202 206
203 207 # data verification
204 208 mflinkrevs, filelinkrevs = self._verifychangelog()
205 209 filenodes = self._verifymanifest(mflinkrevs)
206 210 del mflinkrevs
207 211 self._crosscheckfiles(filelinkrevs, filenodes)
208 212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
209 213
210 214 # final report
211 215 ui.status(
212 216 _(b"checked %d changesets with %d changes to %d files\n")
213 217 % (len(repo.changelog), filerevisions, totalfiles)
214 218 )
215 219 if self.warnings:
216 220 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
217 221 if self.fncachewarned:
218 222 ui.warn(HINT_FNCACHE)
219 223 if self.errors:
220 224 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
221 225 if self.badrevs:
222 226 msg = _(b"(first damaged changeset appears to be %d)\n")
223 227 msg %= min(self.badrevs)
224 228 ui.warn(msg)
225 229 return 1
226 230 return 0
227 231
228 232 def _verifychangelog(self):
229 233 """verify the changelog of a repository
230 234
231 235 The following checks are performed:
232 236 - all of `_checkrevlog` checks,
233 237 - all of `_checkentry` checks (for each revisions),
234 238 - each revision can be read.
235 239
236 240 The function returns some of the data observed in the changesets as a
237 241 (mflinkrevs, filelinkrevs) tuples:
238 242 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
239 243 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
240 244
241 245 If a matcher was specified, filelinkrevs will only contains matched
242 246 files.
243 247 """
244 248 ui = self.ui
245 249 repo = self.repo
246 250 match = self.match
247 251 cl = repo.changelog
248 252
249 253 ui.status(_(b"checking changesets\n"))
250 254 mflinkrevs = {}
251 255 filelinkrevs = {}
252 256 seen = {}
253 257 self._checkrevlog(cl, b"changelog", 0)
254 258 progress = ui.makeprogress(
255 259 _(b'checking'), unit=_(b'changesets'), total=len(repo)
256 260 )
257 261 for i in repo:
258 262 progress.update(i)
259 263 n = cl.node(i)
260 264 self._checkentry(cl, i, n, seen, [i], b"changelog")
261 265
262 266 try:
263 267 changes = cl.read(n)
264 268 if changes[0] != self.repo.nullid:
265 269 mflinkrevs.setdefault(changes[0], []).append(i)
266 270 self.refersmf = True
267 271 for f in changes[3]:
268 272 if match(f):
269 273 filelinkrevs.setdefault(_normpath(f), []).append(i)
270 274 except Exception as inst:
271 275 self.refersmf = True
272 276 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
273 277 progress.complete()
274 278 return mflinkrevs, filelinkrevs
275 279
276 280 def _verifymanifest(
277 281 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
278 282 ):
279 283 """verify the manifestlog content
280 284
281 285 Inputs:
282 286 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
283 287 - dir: a subdirectory to check (for tree manifest repo)
284 288 - storefiles: set of currently "orphan" files.
285 289 - subdirprogress: a progress object
286 290
287 291 This function checks:
288 292 * all of `_checkrevlog` checks (for all manifest related revlogs)
289 293 * all of `_checkentry` checks (for all manifest related revisions)
290 294 * nodes for subdirectory exists in the sub-directory manifest
291 295 * each manifest entries have a file path
292 296 * each manifest node refered in mflinkrevs exist in the manifest log
293 297
294 298 If tree manifest is in use and a matchers is specified, only the
295 299 sub-directories matching it will be verified.
296 300
297 301 return a two level mapping:
298 302 {"path" -> { filenode -> changelog-revision}}
299 303
300 304 This mapping primarily contains entries for every files in the
301 305 repository. In addition, when tree-manifest is used, it also contains
302 306 sub-directory entries.
303 307
304 308 If a matcher is provided, only matching paths will be included.
305 309 """
306 310 repo = self.repo
307 311 ui = self.ui
308 312 match = self.match
309 313 mfl = self.repo.manifestlog
310 314 mf = mfl.getstorage(dir)
311 315
312 316 if not dir:
313 317 self.ui.status(_(b"checking manifests\n"))
314 318
315 319 filenodes = {}
316 320 subdirnodes = {}
317 321 seen = {}
318 322 label = b"manifest"
319 323 if dir:
320 324 label = dir
321 325 revlogfiles = mf.files()
322 326 storefiles.difference_update(revlogfiles)
323 327 if subdirprogress: # should be true since we're in a subdirectory
324 328 subdirprogress.increment()
325 329 if self.refersmf:
326 330 # Do not check manifest if there are only changelog entries with
327 331 # null manifests.
328 332 self._checkrevlog(mf._revlog, label, 0)
329 333 progress = ui.makeprogress(
330 334 _(b'checking'), unit=_(b'manifests'), total=len(mf)
331 335 )
332 336 for i in mf:
333 337 if not dir:
334 338 progress.update(i)
335 339 n = mf.node(i)
336 340 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
337 341 if n in mflinkrevs:
338 342 del mflinkrevs[n]
339 343 elif dir:
340 344 msg = _(b"%s not in parent-directory manifest") % short(n)
341 345 self._err(lr, msg, label)
342 346 else:
343 347 self._err(lr, _(b"%s not in changesets") % short(n), label)
344 348
345 349 try:
346 350 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
347 351 for f, fn, fl in mfdelta.iterentries():
348 352 if not f:
349 353 self._err(lr, _(b"entry without name in manifest"))
350 354 elif f == b"/dev/null": # ignore this in very old repos
351 355 continue
352 356 fullpath = dir + _normpath(f)
353 357 if fl == b't':
354 358 if not match.visitdir(fullpath):
355 359 continue
356 360 sdn = subdirnodes.setdefault(fullpath + b'/', {})
357 361 sdn.setdefault(fn, []).append(lr)
358 362 else:
359 363 if not match(fullpath):
360 364 continue
361 365 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
362 366 except Exception as inst:
363 367 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
364 368 if self._level >= VERIFY_FULL:
365 369 try:
366 370 # Various issues can affect manifest. So we read each full
367 371 # text from storage. This triggers the checks from the core
368 372 # code (eg: hash verification, filename are ordered, etc.)
369 373 mfdelta = mfl.get(dir, n).read()
370 374 except Exception as inst:
371 375 msg = _(b"reading full manifest %s") % short(n)
372 376 self._exc(lr, msg, inst, label)
373 377
374 378 if not dir:
375 379 progress.complete()
376 380
377 381 if self.havemf:
378 382 # since we delete entry in `mflinkrevs` during iteration, any
379 383 # remaining entries are "missing". We need to issue errors for them.
380 384 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
381 385 for c, m in sorted(changesetpairs):
382 386 if dir:
383 387 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
384 388 else:
385 389 msg = _(b"changeset refers to unknown revision %s")
386 390 msg %= short(m)
387 391 self._err(c, msg, label)
388 392
389 393 if not dir and subdirnodes:
390 394 self.ui.status(_(b"checking directory manifests\n"))
391 395 storefiles = set()
392 396 subdirs = set()
393 397 revlogv1 = self.revlogv1
394 398 for t, f, f2, size in repo.store.datafiles():
395 399 if not f:
396 400 self._err(None, _(b"cannot decode filename '%s'") % f2)
397 401 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
398 402 storefiles.add(_normpath(f))
399 403 subdirs.add(os.path.dirname(f))
400 404 subdirprogress = ui.makeprogress(
401 405 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
402 406 )
403 407
404 408 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
405 409 subdirfilenodes = self._verifymanifest(
406 410 linkrevs, subdir, storefiles, subdirprogress
407 411 )
408 412 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
409 413 filenodes.setdefault(f, {}).update(onefilenodes)
410 414
411 415 if not dir and subdirnodes:
412 416 assert subdirprogress is not None # help pytype
413 417 subdirprogress.complete()
414 418 if self.warnorphanstorefiles:
415 419 for f in sorted(storefiles):
416 420 self._warn(_(b"warning: orphan data file '%s'") % f)
417 421
418 422 return filenodes
419 423
420 424 def _crosscheckfiles(self, filelinkrevs, filenodes):
421 425 repo = self.repo
422 426 ui = self.ui
423 427 ui.status(_(b"crosschecking files in changesets and manifests\n"))
424 428
425 429 total = len(filelinkrevs) + len(filenodes)
426 430 progress = ui.makeprogress(
427 431 _(b'crosschecking'), unit=_(b'files'), total=total
428 432 )
429 433 if self.havemf:
430 434 for f in sorted(filelinkrevs):
431 435 progress.increment()
432 436 if f not in filenodes:
433 437 lr = filelinkrevs[f][0]
434 438 self._err(lr, _(b"in changeset but not in manifest"), f)
435 439
436 440 if self.havecl:
437 441 for f in sorted(filenodes):
438 442 progress.increment()
439 443 if f not in filelinkrevs:
440 444 try:
441 445 fl = repo.file(f)
442 446 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
443 447 except Exception:
444 448 lr = None
445 449 self._err(lr, _(b"in manifest but not in changeset"), f)
446 450
447 451 progress.complete()
448 452
449 453 def _verifyfiles(self, filenodes, filelinkrevs):
450 454 repo = self.repo
451 455 ui = self.ui
452 456 lrugetctx = self.lrugetctx
453 457 revlogv1 = self.revlogv1
454 458 havemf = self.havemf
455 459 ui.status(_(b"checking files\n"))
456 460
457 461 storefiles = set()
458 462 for rl_type, f, f2, size in repo.store.datafiles():
459 463 if not f:
460 464 self._err(None, _(b"cannot decode filename '%s'") % f2)
461 465 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
462 466 storefiles.add(_normpath(f))
463 467
464 468 state = {
465 469 # TODO this assumes revlog storage for changelog.
466 470 b'expectedversion': self.repo.changelog._format_version,
467 471 b'skipflags': self.skipflags,
468 472 # experimental config: censor.policy
469 473 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
470 474 }
471 475
472 476 files = sorted(set(filenodes) | set(filelinkrevs))
473 477 revisions = 0
474 478 progress = ui.makeprogress(
475 479 _(b'checking'), unit=_(b'files'), total=len(files)
476 480 )
477 481 for i, f in enumerate(files):
478 482 progress.update(i, item=f)
479 483 try:
480 484 linkrevs = filelinkrevs[f]
481 485 except KeyError:
482 486 # in manifest but not in changelog
483 487 linkrevs = []
484 488
485 489 if linkrevs:
486 490 lr = linkrevs[0]
487 491 else:
488 492 lr = None
489 493
490 494 try:
491 495 fl = repo.file(f)
492 496 except error.StorageError as e:
493 497 self._err(lr, _(b"broken revlog! (%s)") % e, f)
494 498 continue
495 499
496 500 for ff in fl.files():
497 501 try:
498 502 storefiles.remove(ff)
499 503 except KeyError:
500 504 if self.warnorphanstorefiles:
501 505 msg = _(b" warning: revlog '%s' not in fncache!")
502 506 self._warn(msg % ff)
503 507 self.fncachewarned = True
504 508
505 509 if not len(fl) and (self.havecl or self.havemf):
506 510 self._err(lr, _(b"empty or missing %s") % f)
507 511 else:
508 512 # Guard against implementations not setting this.
509 513 state[b'skipread'] = set()
510 514 state[b'safe_renamed'] = set()
511 515
512 516 for problem in fl.verifyintegrity(state):
513 517 if problem.node is not None:
514 518 linkrev = fl.linkrev(fl.rev(problem.node))
515 519 else:
516 520 linkrev = None
517 521
518 522 if problem.warning:
519 523 self._warn(problem.warning)
520 524 elif problem.error:
521 525 linkrev_msg = linkrev if linkrev is not None else lr
522 526 self._err(linkrev_msg, problem.error, f)
523 527 else:
524 528 raise error.ProgrammingError(
525 529 b'problem instance does not set warning or error '
526 530 b'attribute: %s' % problem.msg
527 531 )
528 532
529 533 seen = {}
530 534 for i in fl:
531 535 revisions += 1
532 536 n = fl.node(i)
533 537 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
534 538 if f in filenodes:
535 539 if havemf and n not in filenodes[f]:
536 540 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
537 541 else:
538 542 del filenodes[f][n]
539 543
540 544 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
541 545 continue
542 546
543 547 # check renames
544 548 try:
545 549 # This requires resolving fulltext (at least on revlogs,
546 550 # though not with LFS revisions). We may want
547 551 # ``verifyintegrity()`` to pass a set of nodes with
548 552 # rename metadata as an optimization.
549 553 rp = fl.renamed(n)
550 554 if rp:
551 555 if lr is not None and ui.verbose:
552 556 ctx = lrugetctx(lr)
553 557 if not any(rp[0] in pctx for pctx in ctx.parents()):
554 558 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
555 559 fl2 = repo.file(rp[0])
556 560 if not len(fl2):
557 561 m = _(b"empty or missing copy source revlog %s:%s")
558 562 self._err(lr, m % (rp[0], short(rp[1])), f)
559 563 elif rp[1] == self.repo.nullid:
560 ui.note(
561 _(
562 b"warning: %s@%s: copy source"
563 b" revision is nullid %s:%s\n"
564 )
565 % (f, lr, rp[0], short(rp[1]))
566 )
564 msg = WARN_NULLID_COPY_SOURCE
565 msg %= (f, lr, rp[0], short(rp[1]))
566 ui.note(msg)
567 567 else:
568 568 fl2.rev(rp[1])
569 569 except Exception as inst:
570 570 self._exc(
571 571 lr, _(b"checking rename of %s") % short(n), inst, f
572 572 )
573 573
574 574 # cross-check
575 575 if f in filenodes:
576 576 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
577 577 for lr, node in sorted(fns):
578 578 self._err(
579 579 lr,
580 580 _(b"manifest refers to unknown revision %s")
581 581 % short(node),
582 582 f,
583 583 )
584 584 progress.complete()
585 585
586 586 if self.warnorphanstorefiles:
587 587 for f in sorted(storefiles):
588 588 self._warn(_(b"warning: orphan data file '%s'") % f)
589 589
590 590 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now