# HG changeset patch # User Jun Wu # Date 2017-08-28 21:49:00 # Node ID c6c8a52e28c9077580dd2f0552eb2bd6d5e0d13c # Parent 8b659b7388c0410ec690cc27bce5e1f5c503c599 revset: optimize "draft() & ::x" pattern The `draft() & ::x` type query could be common for selecting one or more draft feature branches being worked on. Before this patch, `::x` may travel through the changelog DAG for a long distance until it gets a smaller revision number than `min(draft())`. It could be very slow on long changelog with distant (in terms of revision numbers) drafts. This patch adds a fast path for this situation, and will stop traveling the changelog DAG once `::x` hits a non-draft revision. The fast path also works for `secret()` and `not public()`. To measure the performance difference, I used drawdag to create a repo that emulates distant drafts: DRAFT4 | DRAFT3 # draft / PUBLIC9999 # public | PUBLIC9998 | . DRAFT2 . | . DRAFT1 # draft | / PUBLIC0001 # public And measured the performance using the repo: (BEFORE) $ hg perfrevset 'draft() & ::(DRAFT2+DRAFT4)' ! wall 0.017132 comb 0.010000 user 0.010000 sys 0.000000 (best of 156) $ hg perfrevset 'draft() & ::(all())' ! wall 0.024221 comb 0.030000 user 0.030000 sys 0.000000 (best of 113) (AFTER) $ hg perfrevset 'draft() & ::(DRAFT2+DRAFT4)' ! wall 0.000243 comb 0.000000 user 0.000000 sys 0.000000 (best of 9303) $ hg perfrevset 'draft() & ::(all())' ! wall 0.004319 comb 0.000000 user 0.000000 sys 0.000000 (best of 655) Differential Revision: https://phab.mercurial-scm.org/D441 diff --git a/mercurial/dagop.py b/mercurial/dagop.py --- a/mercurial/dagop.py +++ b/mercurial/dagop.py @@ -75,27 +75,49 @@ def _walkrevtree(pfunc, revs, startdepth if prev != node.nullrev: heapq.heappush(pendingheap, (heapsign * prev, pdepth)) -def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth): +def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, cutfunc): if followfirst: cut = 1 else: cut = None cl = repo.changelog - def pfunc(rev): + def plainpfunc(rev): try: return cl.parentrevs(rev)[:cut] except error.WdirUnsupported: return (pctx.rev() for pctx in repo[rev].parents()[:cut]) + if cutfunc is None: + pfunc = plainpfunc + else: + pfunc = lambda rev: [r for r in plainpfunc(rev) if not cutfunc(r)] + revs = revs.filter(lambda rev: not cutfunc(rev)) return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=True) -def revancestors(repo, revs, followfirst, startdepth=None, stopdepth=None): +def revancestors(repo, revs, followfirst=False, startdepth=None, + stopdepth=None, cutfunc=None): """Like revlog.ancestors(), but supports additional options, includes the given revs themselves, and returns a smartset Scan ends at the stopdepth (exlusive) if specified. Revisions found earlier than the startdepth are omitted. + + If cutfunc is provided, it will be used to cut the traversal of the DAG. + When cutfunc(X) returns True, the DAG traversal stops - revision X and + X's ancestors in the traversal path will be skipped. This could be an + optimization sometimes. + + Note: if Y is an ancestor of X, cutfunc(X) returning True does not + necessarily mean Y will also be cut. Usually cutfunc(Y) also wants to + return True in this case. For example, + + D # revancestors(repo, D, cutfunc=lambda rev: rev == B) + |\ # will include "A", because the path D -> C -> A was not cut. + B C # If "B" gets cut, "A" might want to be cut too. + |/ + A """ - gen = _genrevancestors(repo, revs, followfirst, startdepth, stopdepth) + gen = _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, + cutfunc) return generatorset(gen, iterasc=False) def _genrevdescendants(repo, revs, followfirst): diff --git a/mercurial/revset.py b/mercurial/revset.py --- a/mercurial/revset.py +++ b/mercurial/revset.py @@ -1577,6 +1577,37 @@ def _notpublic(repo, subset, x): getargs(x, 0, 0, "_notpublic takes no arguments") return _phase(repo, subset, phases.draft, phases.secret) +# for internal use +@predicate('_phaseandancestors(phasename, set)', safe=True) +def _phaseandancestors(repo, subset, x): + # equivalent to (phasename() & ancestors(set)) but more efficient + # phasename could be one of 'draft', 'secret', or '_notpublic' + args = getargs(x, 2, 2, "_phaseandancestors requires two arguments") + phasename = getsymbol(args[0]) + s = getset(repo, fullreposet(repo), args[1]) + + draft = phases.draft + secret = phases.secret + phasenamemap = { + '_notpublic': draft, + 'draft': draft, # follow secret's ancestors + 'secret': secret, + } + if phasename not in phasenamemap: + raise error.ParseError('%r is not a valid phasename' % phasename) + + minimalphase = phasenamemap[phasename] + getphase = repo._phasecache.phase + + def cutfunc(rev): + return getphase(repo, rev) < minimalphase + + revs = dagop.revancestors(repo, s, cutfunc=cutfunc) + + if phasename == 'draft': # need to remove secret changesets + revs = revs.filter(lambda r: getphase(repo, r) == draft) + return subset & revs + @predicate('public()', safe=True) def public(repo, subset, x): """Changeset in public phase.""" diff --git a/mercurial/revsetlang.py b/mercurial/revsetlang.py --- a/mercurial/revsetlang.py +++ b/mercurial/revsetlang.py @@ -369,6 +369,11 @@ def _optimize(x, small): wb, tb = _optimize(x[2], True) w = min(wa, wb) + # (draft/secret/_notpublic() & ::x) have a fast path + m = _match('_() & ancestors(_)', ('and', ta, tb)) + if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}: + return w, _build('_phaseandancestors(_, _)', m[1], m[2]) + # (::x and not ::y)/(not ::y and ::x) have a fast path m = _matchonly(ta, tb) or _matchonly(tb, ta) if m: diff --git a/tests/test-revset.t b/tests/test-revset.t --- a/tests/test-revset.t +++ b/tests/test-revset.t @@ -4321,3 +4321,147 @@ Test obsstore related revsets $ hg log -r 'successors(B+A)-contentdivergent()-obsolete()' -T '{desc}\n' Z + +Test `draft() & ::x` optimization + + $ hg init $TESTTMP/repo2 + $ cd $TESTTMP/repo2 + $ hg debugdrawdag <<'EOS' + > P5 S1 + > | | + > S2 | D3 + > \|/ + > P4 + > | + > P3 D2 + > | | + > P2 D1 + > |/ + > P1 + > | + > P0 + > EOS + $ hg phase --public -r P5 + $ hg phase --force --secret -r S1+S2 + $ hg log -G -T '{rev} {desc} {phase}' -r 'sort(all(), topo, topo.firstbranch=P5)' + o 8 P5 public + | + | o 10 S1 secret + | | + | o 7 D3 draft + |/ + | o 9 S2 secret + |/ + o 6 P4 public + | + o 5 P3 public + | + o 3 P2 public + | + | o 4 D2 draft + | | + | o 2 D1 draft + |/ + o 1 P1 public + | + o 0 P0 public + + $ hg debugrevspec --verify -p analyzed -p optimized 'draft() & ::(((S1+D1+P5)-D3)+S2)' + * analyzed: + (and + (func + ('symbol', 'draft') + None) + (func + ('symbol', 'ancestors') + (or + (list + (and + (or + (list + ('symbol', 'S1') + ('symbol', 'D1') + ('symbol', 'P5'))) + (not + ('symbol', 'D3'))) + ('symbol', 'S2'))))) + * optimized: + (func + ('symbol', '_phaseandancestors') + (list + ('symbol', 'draft') + (or + (list + (difference + (func + ('symbol', '_list') + ('string', 'S1\x00D1\x00P5')) + ('symbol', 'D3')) + ('symbol', 'S2'))))) + $ hg debugrevspec --verify -p analyzed -p optimized 'secret() & ::9' + * analyzed: + (and + (func + ('symbol', 'secret') + None) + (func + ('symbol', 'ancestors') + ('symbol', '9'))) + * optimized: + (func + ('symbol', '_phaseandancestors') + (list + ('symbol', 'secret') + ('symbol', '9'))) + $ hg debugrevspec --verify -p analyzed -p optimized '7 & ( (not public()) & ::(tag()) )' + * analyzed: + (and + ('symbol', '7') + (and + (not + (func + ('symbol', 'public') + None)) + (func + ('symbol', 'ancestors') + (func + ('symbol', 'tag') + None)))) + * optimized: + (and + ('symbol', '7') + (func + ('symbol', '_phaseandancestors') + (list + ('symbol', '_notpublic') + (func + ('symbol', 'tag') + None)))) + $ hg debugrevspec --verify -p optimized '(not public()) & ancestors(S1+D2+P5, 1)' + * optimized: + (and + (func + ('symbol', '_notpublic') + None) + (func + ('symbol', 'ancestors') + (list + (func + ('symbol', '_list') + ('string', 'S1\x00D2\x00P5')) + ('symbol', '1')))) + $ hg debugrevspec --verify -p optimized '(not public()) & ancestors(S1+D2+P5, depth=1)' + * optimized: + (and + (func + ('symbol', '_notpublic') + None) + (func + ('symbol', 'ancestors') + (list + (func + ('symbol', '_list') + ('string', 'S1\x00D2\x00P5')) + (keyvalue + ('symbol', 'depth') + ('symbol', '1')))))