# HG changeset patch # User Gregory Szorc # Date 2014-07-24 19:12:12 # Node ID f486001f9d6fc7dd161016eedfa68a42b7a2840a # Parent c8411fb5dfef1284d6a757d1eb610623104287d0 revset: optimize baseset.__sub__ (issue4313) dd716807fd23 regressed performance of baseset.__sub__ by introducing a lazyset. This patch restores that lost performance by eagerly evaluating baseset.__sub__ if the other set is a baseset. revsetbenchmark.py results impacted by this change: revset #6: roots(0::tip) 0) wall 2.923473 comb 2.920000 user 2.920000 sys 0.000000 (best of 4) 1) wall 0.077614 comb 0.080000 user 0.080000 sys 0.000000 (best of 100) revset #23: roots((0:tip)::) 0) wall 2.875178 comb 2.880000 user 2.880000 sys 0.000000 (best of 4) 1) wall 0.154519 comb 0.150000 user 0.150000 sys 0.000000 (best of 61) On the author's machine, this slowdown manifested during evaluation of 'roots(%ln::)' in phases.retractboundary after unbundling the Firefox repository. Using `time hg unbundle firefox.hg` as a benchmark: Before: 8:00 After: 4:28 Delta: -3:32 For reference, the subset and cs baseset instances impacted by this change were of lengths 193634 and 193627, respectively. Explicit test coverage of roots(%ln::), while similar to the existing roots(0::tip) benchmark, has been added. diff --git a/contrib/revsetbenchmarks.txt b/contrib/revsetbenchmarks.txt --- a/contrib/revsetbenchmarks.txt +++ b/contrib/revsetbenchmarks.txt @@ -21,3 +21,4 @@ public() draft() :10000 and draft() max(::(tip~20) - obsolete()) +roots((0:tip)::) diff --git a/mercurial/revset.py b/mercurial/revset.py --- a/mercurial/revset.py +++ b/mercurial/revset.py @@ -2232,6 +2232,13 @@ class baseset(list): """Returns a new object with the substraction of the two collections. This is part of the mandatory API for smartset.""" + # If we are operating on 2 baseset, do the computation now since all + # data is available. The alternative is to involve a lazyset, which + # may be slow. + if isinstance(other, baseset): + other = other.set() + return baseset([x for x in self if x not in other]) + return self.filter(lambda x: x not in other) def __and__(self, other):