# HG changeset patch # User Mike Edgar # Date 2014-09-04 02:14:20 # Node ID 27e2317efe89f995b14d1521c476e5d6cc1ee49b # Parent 244478687edd30038f336814ea03a54e99388946 filelog: raise CensoredNodeError when hash checks fail with censor metadata With this change, when a revlog revision hash does not match its content, and the content is empty with a special metadata key, the integrity failure is assumed to be intentionally caused to remove sensitive content from repository history. To allow different Mercurial functionality to handle this scenario differently a more specific exception is raised than "ordinary" hash failures. Alternatives to this approach include, but are not limited to: - Calling a hook when hashes mismatch to allow arbitrary tombstone validation. Cons: Irresponsibly easy to disable integrity checking altogether. - Returning empty revision data eagerly instead of raising, masking the error. Cons: Push/pull won't roundtrip the tombstone, so client repos are unusable. - Doing nothing differently at this layer. Callers must do their own detection of tombstoned data if they want to handle some hash checks and not others. - Impacts dozens of callsites, many of which don't have the revision data - Would probably be missing one or two callsites at any given time - Currently we throw a RevlogError, as do 12 other places in revlog.py. Callers would need to parse the exception message and/or ensure RevlogError is not thrown from any other part of their call tree. diff --git a/mercurial/filelog.py b/mercurial/filelog.py --- a/mercurial/filelog.py +++ b/mercurial/filelog.py @@ -5,7 +5,7 @@ # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. -import revlog +import error, revlog import re _mdre = re.compile('\1\n') @@ -27,6 +27,10 @@ def packmeta(meta, text): metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys) return "\1\n%s\1\n%s" % (metatext, text) +def _censoredtext(text): + m, offs = parsemeta(text) + return m and "censored" in m and not text[offs:] + class filelog(revlog.revlog): def __init__(self, opener, path): super(filelog, self).__init__(opener, @@ -86,5 +90,13 @@ class filelog(revlog.revlog): return True + def checkhash(self, text, p1, p2, node, rev=None): + try: + super(filelog, self).checkhash(text, p1, p2, node, rev=rev) + except error.RevlogError: + if _censoredtext(text): + raise error.CensoredNodeError(self.indexfile, node) + raise + def _file(self, f): return filelog(self.opener, f)