# HG changeset patch # User Phil Cohen # Date 2017-10-17 19:41:24 # Node ID 14c87708f4328fa46938be99d6deba678f3455aa # Parent 2e8477059d4f34927600781296704cc87485fb7e arbitraryfilecontext: skip the cmp fast path if any side is a symlink `filecmp` follows symlinks by default, which a `filectx.cmp()` call should not be doing as it should only compare the requested entry. After this patch, only the contexts' data are compared, which is the correct contract. This is a corrected version of D1122. Differential Revision: https://phab.mercurial-scm.org/D1165 diff --git a/mercurial/context.py b/mercurial/context.py --- a/mercurial/context.py +++ b/mercurial/context.py @@ -2570,9 +2570,13 @@ class arbitraryfilectx(object): self._path = path def cmp(self, fctx): - if isinstance(fctx, workingfilectx) and self._repo: + # filecmp follows symlinks whereas `cmp` should not, so skip the fast + # path if either side is a symlink. + symlinks = ('l' in self.flags() or 'l' in fctx.flags()) + if not symlinks and isinstance(fctx, workingfilectx) and self._repo: # Add a fast-path for merge if both sides are disk-backed. - # Note that filecmp uses the opposite return values as cmp. + # Note that filecmp uses the opposite return values (True if same) + # from our cmp functions (True if different). return not filecmp.cmp(self.path(), self._repo.wjoin(fctx.path())) return self.data() != fctx.data() diff --git a/tests/test-arbitraryfilectx.t b/tests/test-arbitraryfilectx.t new file mode 100644 --- /dev/null +++ b/tests/test-arbitraryfilectx.t @@ -0,0 +1,57 @@ +Setup: + $ cat > eval.py < from __future__ import absolute_import + > import filecmp + > from mercurial import commands, context, registrar + > cmdtable = {} + > command = registrar.command(cmdtable) + > @command(b'eval', [], 'hg eval CMD') + > def eval_(ui, repo, *cmds, **opts): + > cmd = " ".join(cmds) + > res = str(eval(cmd, globals(), locals())) + > ui.warn("%s" % res) + > EOF + + $ echo "[extensions]" >> $HGRCPATH + $ echo "eval=`pwd`/eval.py" >> $HGRCPATH + +Arbitraryfilectx.cmp does not follow symlinks: + $ mkdir case1 + $ cd case1 + $ hg init + $ printf "A" > real_A + $ printf "foo" > A + $ printf "foo" > B + $ ln -s A sym_A + $ hg add . + adding A + adding B + adding real_A + adding sym_A + $ hg commit -m "base" + +These files are different and should return True (different): +(Note that filecmp.cmp's return semantics are inverted from ours, so we invert +for simplicity): + $ hg eval "context.arbitraryfilectx('A', repo).cmp(repo[None]['real_A'])" + True (no-eol) + $ hg eval "not filecmp.cmp('A', 'real_A')" + True (no-eol) + +These files are identical and should return False (same): + $ hg eval "context.arbitraryfilectx('A', repo).cmp(repo[None]['A'])" + False (no-eol) + $ hg eval "context.arbitraryfilectx('A', repo).cmp(repo[None]['B'])" + False (no-eol) + $ hg eval "not filecmp.cmp('A', 'B')" + False (no-eol) + +This comparison should also return False, since A and sym_A are substantially +the same in the eyes of ``filectx.cmp``, which looks at data only. + $ hg eval "context.arbitraryfilectx('real_A', repo).cmp(repo[None]['sym_A'])" + False (no-eol) + +A naive use of filecmp on those two would wrongly return True, since it follows +the symlink to "A", which has different contents. + $ hg eval "not filecmp.cmp('real_A', 'sym_A')" + True (no-eol)