# HG changeset patch # User Martin von Zweigbergk # Date 2019-02-19 23:42:45 # Node ID 2d562af6014bbe9a094746c275a9b8a4ebeb8b4f # Parent 456ad0fd8e188373f9fb4e42b80caaa36a3db932 copies: do copy tracing based on ctx.p[12]copies() if configured This adds an option to do copy tracing in a changeset-optimized way. If the metadata is stored in filelogs, this is obviously going to be suboptimal. The point is that it provides a way of transitioning to changeset-stored metadata. Some of the tests behave a little differently, but they all seem resonable to me. The config option may very well be renamed later when it's clearer what options we want and how they will behave. When the test suite is run with --extra-config-opt to use the new copy tracing, all tests pass, besides test-copies.t (which fails in the same way as you can see in this patch). `hg debugpathcopies 4.0 4.8` reports 82 copies. With this option enabled, the only difference is this: -mercurial/pure/bdiff.py -> mercurial/cffi/bdiff.py +setup_bdiff_cffi.py -> mercurial/cffi/bdiff.py I believe that happened because it was renamed in different ways on different sides of a merge and the new algorithm arbitrarily prefers copies that happened on p1. The runtime is about 0.85 seconds with the old copy tracing and 5.7 seconds with the new copy tracing. That's kind of slow, but actually better than I had expected. Differential Revision: https://phab.mercurial-scm.org/D5991 diff --git a/mercurial/configitems.py b/mercurial/configitems.py --- a/mercurial/configitems.py +++ b/mercurial/configitems.py @@ -482,6 +482,9 @@ coreconfigitem('experimental', 'copytrac coreconfigitem('experimental', 'copytrace.sourcecommitlimit', default=100, ) +coreconfigitem('experimental', 'copies.read-from', + default="filelog-only", +) coreconfigitem('experimental', 'crecordtest', default=None, ) diff --git a/mercurial/copies.py b/mercurial/copies.py --- a/mercurial/copies.py +++ b/mercurial/copies.py @@ -166,6 +166,10 @@ def _committedforwardcopies(a, b, match) # files might have to be traced back to the fctx parent of the last # one-side-only changeset, but not further back than that repo = a._repo + + if repo.ui.config('experimental', 'copies.read-from') == 'compatibility': + return _changesetforwardcopies(a, b, match) + debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies') dbg = repo.ui.debug if debug: @@ -216,6 +220,76 @@ def _committedforwardcopies(a, b, match) % (util.timer() - start)) return cm +def _changesetforwardcopies(a, b, match): + if a.rev() == node.nullrev: + return {} + + repo = a.repo() + children = {} + cl = repo.changelog + missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()]) + for r in missingrevs: + for p in cl.parentrevs(r): + if p == node.nullrev: + continue + if p not in children: + children[p] = [r] + else: + children[p].append(r) + + roots = set(children) - set(missingrevs) + # 'work' contains 3-tuples of a (revision number, parent number, copies). + # The parent number is only used for knowing which parent the copies dict + # came from. + work = [(r, 1, {}) for r in roots] + heapq.heapify(work) + while work: + r, i1, copies1 = heapq.heappop(work) + if work and work[0][0] == r: + # We are tracing copies from both parents + r, i2, copies2 = heapq.heappop(work) + copies = {} + ctx = repo[r] + p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest() + allcopies = set(copies1) | set(copies2) + # TODO: perhaps this filtering should be done as long as ctx + # is merge, whether or not we're tracing from both parent. + for dst in allcopies: + if not match(dst): + continue + if dst not in copies2: + # Copied on p1 side: mark as copy from p1 side if it didn't + # already exist on p2 side + if dst not in p2man: + copies[dst] = copies1[dst] + elif dst not in copies1: + # Copied on p2 side: mark as copy from p2 side if it didn't + # already exist on p1 side + if dst not in p1man: + copies[dst] = copies2[dst] + else: + # Copied on both sides: mark as copy from p1 side + copies[dst] = copies1[dst] + else: + copies = copies1 + if r == b.rev(): + return copies + for c in children[r]: + childctx = repo[c] + if r == childctx.p1().rev(): + parent = 1 + childcopies = childctx.p1copies() + else: + assert r == childctx.p2().rev() + parent = 2 + childcopies = childctx.p2copies() + if not match.always(): + childcopies = {dst: src for dst, src in childcopies.items() + if match(dst)} + childcopies = _chain(a, childctx, copies, childcopies) + heapq.heappush(work, (c, parent, childcopies)) + assert False + def _forwardcopies(a, b, match=None): """find {dst@b: src@a} copy mapping where a is an ancestor of b""" diff --git a/tests/test-copies.t b/tests/test-copies.t --- a/tests/test-copies.t +++ b/tests/test-copies.t @@ -1,9 +1,17 @@ +#testcases filelog compatibility $ cat >> $HGRCPATH << EOF > [alias] > l = log -G -T '{rev} {desc}\n{files}\n' > EOF +#if compatibility + $ cat >> $HGRCPATH << EOF + > [experimental] + > copies.read-from = compatibility + > EOF +#endif + $ REPONUM=0 $ newrepo() { > cd $TESTTMP @@ -338,7 +346,7 @@ It's a little weird that it shows up on $ hg debugpathcopies 1 2 x -> z $ hg debugpathcopies 0 2 - x -> z + x -> z (filelog !) Copy file that exists on both sides of the merge, different content $ newrepo @@ -476,7 +484,8 @@ Try merging the other direction too $ hg debugpathcopies 1 4 $ hg debugpathcopies 2 4 $ hg debugpathcopies 0 4 - x -> z + x -> z (filelog !) + y -> z (compatibility !) $ hg debugpathcopies 1 5 $ hg debugpathcopies 2 5 $ hg debugpathcopies 0 5