# HG changeset patch # User Mads Kiilerich # Date 2015-01-07 23:01:03 # Node ID 678f53865c6860a950392691814766957ee89316 # Parent 7d63398fbfd1b3f8693e46035c9058b6cfb824aa revset: use localrepo revbranchcache for branch name filtering Branch name filtering in revsets was expensive. For every rev it created a changectx and called .branch() which retrieved the branch name from the changelog. Instead, use the revbranchcache. The revbranchcache is used read-only. The revset implementation with generators and callbacks makes it hard to figure out when we are done using/updating the cache and could write it back. It would also be 'tricky' to lock the repo for writing from within a revset execution. Finally, the branchmap update will usually make sure that the cache is updated before any revset can be run. The revbranchcache is used without any locking but is short-lived and used in a tight loop where we can assume that the changelog doesn't change ... or where it not is relevant to us if it does. perfrevset 'branch(mobile)' on mozilla-central. Before: ! wall 10.989637 comb 10.970000 user 10.940000 sys 0.030000 (best of 3) After, no cache: ! wall 7.368656 comb 7.370000 user 7.360000 sys 0.010000 (best of 3) After, with cache: ! wall 0.528098 comb 0.530000 user 0.530000 sys 0.000000 (best of 18) The performance improvement even without cache come from being based on branchinfo on the changelog instead of using ctx.branch(). Some tests are added to verify that the revbranchcache works and keep an eye on when the cache files actually are updated. diff --git a/mercurial/revset.py b/mercurial/revset.py --- a/mercurial/revset.py +++ b/mercurial/revset.py @@ -457,6 +457,11 @@ def branch(repo, subset, x): a regular expression. To match a branch that actually starts with `re:`, use the prefix `literal:`. """ + import branchmap + urepo = repo.unfiltered() + ucl = urepo.changelog + getbi = branchmap.revbranchcache(urepo).branchinfo + try: b = getstring(x, '') except error.ParseError: @@ -468,16 +473,16 @@ def branch(repo, subset, x): # note: falls through to the revspec case if no branch with # this name exists if pattern in repo.branchmap(): - return subset.filter(lambda r: matcher(repo[r].branch())) + return subset.filter(lambda r: matcher(getbi(ucl, r)[0])) else: - return subset.filter(lambda r: matcher(repo[r].branch())) + return subset.filter(lambda r: matcher(getbi(ucl, r)[0])) s = getset(repo, spanset(repo), x) b = set() for r in s: - b.add(repo[r].branch()) + b.add(getbi(ucl, r)[0]) c = s.__contains__ - return subset.filter(lambda r: c(r) or repo[r].branch() in b) + return subset.filter(lambda r: c(r) or getbi(ucl, r)[0] in b) def bumped(repo, subset, x): """``bumped()`` diff --git a/tests/test-branches.t b/tests/test-branches.t --- a/tests/test-branches.t +++ b/tests/test-branches.t @@ -520,4 +520,79 @@ template output: } ] +revision branch name caching implementation + +cache creation + $ rm .hg/cache/rbc-revs-v1 + $ hg debugrevspec 'branch("re:a ")' + 7 + $ [ -f .hg/cache/rbc-revs-v1 ] || echo no file + no file +recovery from invalid cache file + $ echo > .hg/cache/rbc-revs-v1 + $ hg debugrevspec 'branch("re:a ")' + 7 +cache update NOT fully written from revset + $ "$TESTDIR/md5sum.py" .hg/cache/rbc-revs-v1 + 68b329da9893e34099c7d8ad5cb9c940 .hg/cache/rbc-revs-v1 +recovery from other corruption - extra trailing data + $ echo >> .hg/cache/rbc-revs-v1 + $ hg debugrevspec 'branch("re:a ")' + 7 +cache update NOT fully written from revset + $ "$TESTDIR/md5sum.py" .hg/cache/rbc-revs-v1 + e1c06d85ae7b8b032bef47e42e4c08f9 .hg/cache/rbc-revs-v1 +lazy update after commit + $ hg tag tag + $ "$TESTDIR/md5sum.py" .hg/cache/rbc-revs-v1 + d0c0166808ee0a1f0e8894915ad363b6 .hg/cache/rbc-revs-v1 + $ hg debugrevspec 'branch("re:a ")' + 7 + $ "$TESTDIR/md5sum.py" .hg/cache/rbc-revs-v1 + d0c0166808ee0a1f0e8894915ad363b6 .hg/cache/rbc-revs-v1 +update after rollback - cache keeps stripped revs until written for other reasons + $ hg up -qr '.^' + $ hg rollback -qf + $ "$TESTDIR/md5sum.py" .hg/cache/rbc-revs-v1 + d8c2acdc229bf942fde1dfdbe8f9d933 .hg/cache/rbc-revs-v1 + $ hg debugrevspec 'branch("re:a ")' + 7 + $ "$TESTDIR/md5sum.py" .hg/cache/rbc-revs-v1 + d8c2acdc229bf942fde1dfdbe8f9d933 .hg/cache/rbc-revs-v1 +handle history mutations that doesn't change the tip node - this is a problem +with the cache invalidation scheme used by branchmap + $ hg log -r tip+b -T'{rev}:{node|short} {branch}\n' + 14:f894c25619d3 c + 13:e23b5505d1ad b + $ hg bundle -q --all bu.hg + $ hg --config extensions.strip= strip --no-b -qr -1: + $ hg up -q tip + $ hg branch + b + $ hg branch -q hacked + $ hg ci --amend -qm 'hacked' + $ hg pull -q bu.hg -r f894c25619d3 + $ hg log -r tip+b -T'{rev}:{node|short} {branch}\n' + 14:f894c25619d3 c + 12:e3d49c0575d8 b + $ hg debugrevspec 'branch("hacked")' + 13 + $ "$TESTDIR/md5sum.py" .hg/cache/rbc-revs-v1 + 22424d7e106c894336d9d705b0241bc5 .hg/cache/rbc-revs-v1 +cleanup, restore old state + $ hg --config extensions.strip= strip --no-b -qr -2: + $ hg pull -q bu.hg + $ rm bu.hg + $ hg up -qr tip + $ hg log -r tip -T'{rev}:{node|short}\n' + 14:f894c25619d3 +the cache file do not go back to the old state - it still contains the +now unused 'hacked' branch name) + $ hg debugrevspec 'branch("re:a ")' + 7 + $ "$TESTDIR/md5sum.py" .hg/cache/rbc-revs-v1 + d8c2acdc229bf942fde1dfdbe8f9d933 .hg/cache/rbc-revs-v1 + $ cat .hg/cache/rbc-names-v1 + default\x00a\x00b\x00c\x00a branch name much longer than the default justification used by branches\x00hacked (no-eol) (esc) + $ cd ..