##// END OF EJS Templates
revlog: move revision verification out of verify...
Gregory Szorc -
r39908:733db72f default
parent child Browse files
Show More
@@ -53,7 +53,7 b' class filelog(object):'
53 def linkrev(self, rev):
53 def linkrev(self, rev):
54 return self._revlog.linkrev(rev)
54 return self._revlog.linkrev(rev)
55
55
56 # Used by verify.
56 # Unused.
57 def flags(self, rev):
57 def flags(self, rev):
58 return self._revlog.flags(rev)
58 return self._revlog.flags(rev)
59
59
@@ -77,7 +77,7 b' class filelog(object):'
77 def iscensored(self, rev):
77 def iscensored(self, rev):
78 return self._revlog.iscensored(rev)
78 return self._revlog.iscensored(rev)
79
79
80 # Used by repo verify.
80 # Unused.
81 def rawsize(self, rev):
81 def rawsize(self, rev):
82 return self._revlog.rawsize(rev)
82 return self._revlog.rawsize(rev)
83
83
@@ -341,6 +341,12 b' class iverifyproblem(interfaceutil.Inter'
341 error = interfaceutil.Attribute(
341 error = interfaceutil.Attribute(
342 """Message indicating a fatal problem.""")
342 """Message indicating a fatal problem.""")
343
343
344 node = interfaceutil.Attribute(
345 """Revision encountering the problem.
346
347 ``None`` means the problem doesn't apply to a single revision.
348 """)
349
344 class irevisiondelta(interfaceutil.Interface):
350 class irevisiondelta(interfaceutil.Interface):
345 """Represents a delta between one revision and another.
351 """Represents a delta between one revision and another.
346
352
@@ -790,6 +796,10 b' class ifilestorage(ifileindex, ifiledata'
790 used to communicate data between invocations of multiple storage
796 used to communicate data between invocations of multiple storage
791 primitives.
797 primitives.
792
798
799 If individual revisions cannot have their revision content resolved,
800 the method is expected to set the ``skipread`` key to a set of nodes
801 that encountered problems.
802
793 The method yields objects conforming to the ``iverifyproblem``
803 The method yields objects conforming to the ``iverifyproblem``
794 interface.
804 interface.
795 """
805 """
@@ -29,6 +29,7 b' from .node import ('
29 nullhex,
29 nullhex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 short,
32 wdirfilenodeids,
33 wdirfilenodeids,
33 wdirhex,
34 wdirhex,
34 wdirid,
35 wdirid,
@@ -260,6 +261,7 b' class revlogrevisiondelta(object):'
260 class revlogproblem(object):
261 class revlogproblem(object):
261 warning = attr.ib(default=None)
262 warning = attr.ib(default=None)
262 error = attr.ib(default=None)
263 error = attr.ib(default=None)
264 node = attr.ib(default=None)
263
265
264 # index v0:
266 # index v0:
265 # 4 bytes: offset
267 # 4 bytes: offset
@@ -2644,6 +2646,89 b' class revlog(object):'
2644 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2646 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2645 (self.indexfile, version, state['expectedversion']))
2647 (self.indexfile, version, state['expectedversion']))
2646
2648
2649 state['skipread'] = set()
2650
2651 for rev in self:
2652 node = self.node(rev)
2653
2654 # Verify contents. 4 cases to care about:
2655 #
2656 # common: the most common case
2657 # rename: with a rename
2658 # meta: file content starts with b'\1\n', the metadata
2659 # header defined in filelog.py, but without a rename
2660 # ext: content stored externally
2661 #
2662 # More formally, their differences are shown below:
2663 #
2664 # | common | rename | meta | ext
2665 # -------------------------------------------------------
2666 # flags() | 0 | 0 | 0 | not 0
2667 # renamed() | False | True | False | ?
2668 # rawtext[0:2]=='\1\n'| False | True | True | ?
2669 #
2670 # "rawtext" means the raw text stored in revlog data, which
2671 # could be retrieved by "revision(rev, raw=True)". "text"
2672 # mentioned below is "revision(rev, raw=False)".
2673 #
2674 # There are 3 different lengths stored physically:
2675 # 1. L1: rawsize, stored in revlog index
2676 # 2. L2: len(rawtext), stored in revlog data
2677 # 3. L3: len(text), stored in revlog data if flags==0, or
2678 # possibly somewhere else if flags!=0
2679 #
2680 # L1 should be equal to L2. L3 could be different from them.
2681 # "text" may or may not affect commit hash depending on flag
2682 # processors (see revlog.addflagprocessor).
2683 #
2684 # | common | rename | meta | ext
2685 # -------------------------------------------------
2686 # rawsize() | L1 | L1 | L1 | L1
2687 # size() | L1 | L2-LM | L1(*) | L1 (?)
2688 # len(rawtext) | L2 | L2 | L2 | L2
2689 # len(text) | L2 | L2 | L2 | L3
2690 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2691 #
2692 # LM: length of metadata, depending on rawtext
2693 # (*): not ideal, see comment in filelog.size
2694 # (?): could be "- len(meta)" if the resolved content has
2695 # rename metadata
2696 #
2697 # Checks needed to be done:
2698 # 1. length check: L1 == L2, in all cases.
2699 # 2. hash check: depending on flag processor, we may need to
2700 # use either "text" (external), or "rawtext" (in revlog).
2701
2702 try:
2703 skipflags = state.get('skipflags', 0)
2704 if skipflags:
2705 skipflags &= self.flags(rev)
2706
2707 if skipflags:
2708 state['skipread'].add(node)
2709 else:
2710 # Side-effect: read content and verify hash.
2711 self.revision(node)
2712
2713 l1 = self.rawsize(rev)
2714 l2 = len(self.revision(node, raw=True))
2715
2716 if l1 != l2:
2717 yield revlogproblem(
2718 error=_('unpacked size is %d, %d expected') % (l2, l1),
2719 node=node)
2720
2721 except error.CensoredNodeError:
2722 if state['erroroncensored']:
2723 yield revlogproblem(error=_('censored file data'),
2724 node=node)
2725 state['skipread'].add(node)
2726 except Exception as e:
2727 yield revlogproblem(
2728 error=_('unpacking %s: %s') % (short(node), e),
2729 node=node)
2730 state['skipread'].add(node)
2731
2647 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2732 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2648 revisionscount=False, trackedsize=False,
2733 revisionscount=False, trackedsize=False,
2649 storedsize=False):
2734 storedsize=False):
@@ -343,7 +343,10 b' class verifier(object):'
343
343
344 state = {
344 state = {
345 # TODO this assumes revlog storage for changelog.
345 # TODO this assumes revlog storage for changelog.
346 'expectedversion': self.repo.changelog.version & 0xFFFF
346 'expectedversion': self.repo.changelog.version & 0xFFFF,
347 'skipflags': self.skipflags,
348 # experimental config: censor.policy
349 'erroroncensored': ui.config('censor', 'policy') == 'abort',
347 }
350 }
348
351
349 files = sorted(set(filenodes) | set(filelinkrevs))
352 files = sorted(set(filenodes) | set(filelinkrevs))
@@ -381,18 +384,25 b' class verifier(object):'
381 if not len(fl) and (self.havecl or self.havemf):
384 if not len(fl) and (self.havecl or self.havemf):
382 self.err(lr, _("empty or missing %s") % f)
385 self.err(lr, _("empty or missing %s") % f)
383 else:
386 else:
387 # Guard against implementations not setting this.
388 state['skipread'] = set()
384 for problem in fl.verifyintegrity(state):
389 for problem in fl.verifyintegrity(state):
390 if problem.node is not None:
391 linkrev = fl.linkrev(fl.rev(problem.node))
392 else:
393 linkrev = None
394
385 if problem.warning:
395 if problem.warning:
386 self.warn(problem.warning)
396 self.warn(problem.warning)
387 elif problem.error:
397 elif problem.error:
388 self.err(lr, problem.error, f)
398 self.err(linkrev if linkrev is not None else lr,
399 problem.error, f)
389 else:
400 else:
390 raise error.ProgrammingError(
401 raise error.ProgrammingError(
391 'problem instance does not set warning or error '
402 'problem instance does not set warning or error '
392 'attribute: %s' % problem.msg)
403 'attribute: %s' % problem.msg)
393
404
394 seen = {}
405 seen = {}
395 rp = None
396 for i in fl:
406 for i in fl:
397 revisions += 1
407 revisions += 1
398 n = fl.node(i)
408 n = fl.node(i)
@@ -403,75 +413,15 b' class verifier(object):'
403 else:
413 else:
404 del filenodes[f][n]
414 del filenodes[f][n]
405
415
406 # Verify contents. 4 cases to care about:
416 if n in state['skipread']:
407 #
417 continue
408 # common: the most common case
409 # rename: with a rename
410 # meta: file content starts with b'\1\n', the metadata
411 # header defined in filelog.py, but without a rename
412 # ext: content stored externally
413 #
414 # More formally, their differences are shown below:
415 #
416 # | common | rename | meta | ext
417 # -------------------------------------------------------
418 # flags() | 0 | 0 | 0 | not 0
419 # renamed() | False | True | False | ?
420 # rawtext[0:2]=='\1\n'| False | True | True | ?
421 #
422 # "rawtext" means the raw text stored in revlog data, which
423 # could be retrieved by "revision(rev, raw=True)". "text"
424 # mentioned below is "revision(rev, raw=False)".
425 #
426 # There are 3 different lengths stored physically:
427 # 1. L1: rawsize, stored in revlog index
428 # 2. L2: len(rawtext), stored in revlog data
429 # 3. L3: len(text), stored in revlog data if flags==0, or
430 # possibly somewhere else if flags!=0
431 #
432 # L1 should be equal to L2. L3 could be different from them.
433 # "text" may or may not affect commit hash depending on flag
434 # processors (see revlog.addflagprocessor).
435 #
436 # | common | rename | meta | ext
437 # -------------------------------------------------
438 # rawsize() | L1 | L1 | L1 | L1
439 # size() | L1 | L2-LM | L1(*) | L1 (?)
440 # len(rawtext) | L2 | L2 | L2 | L2
441 # len(text) | L2 | L2 | L2 | L3
442 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
443 #
444 # LM: length of metadata, depending on rawtext
445 # (*): not ideal, see comment in filelog.size
446 # (?): could be "- len(meta)" if the resolved content has
447 # rename metadata
448 #
449 # Checks needed to be done:
450 # 1. length check: L1 == L2, in all cases.
451 # 2. hash check: depending on flag processor, we may need to
452 # use either "text" (external), or "rawtext" (in revlog).
453 try:
454 skipflags = self.skipflags
455 if skipflags:
456 skipflags &= fl.flags(i)
457 if not skipflags:
458 fl.read(n) # side effect: read content and do checkhash
459 rp = fl.renamed(n)
460 # the "L1 == L2" check
461 l1 = fl.rawsize(i)
462 l2 = len(fl.revision(n, raw=True))
463 if l1 != l2:
464 self.err(lr, _("unpacked size is %s, %s expected") %
465 (l2, l1), f)
466 except error.CensoredNodeError:
467 # experimental config: censor.policy
468 if ui.config("censor", "policy") == "abort":
469 self.err(lr, _("censored file data"), f)
470 except Exception as inst:
471 self.exc(lr, _("unpacking %s") % short(n), inst, f)
472
418
473 # check renames
419 # check renames
474 try:
420 try:
421 # This requires resolving fulltext (at least on revlogs). We
422 # may want ``verifyintegrity()`` to pass a set of nodes with
423 # rename metadata as an optimization.
424 rp = fl.renamed(n)
475 if rp:
425 if rp:
476 if lr is not None and ui.verbose:
426 if lr is not None and ui.verbose:
477 ctx = lrugetctx(lr)
427 ctx = lrugetctx(lr)
General Comments 0
You need to be logged in to leave comments. Login now