# HG changeset patch # User Siddharth Agarwal # Date 2012-12-18 20:47:20 # Node ID f7f8159caad38b4cf7237de34222ab09f2eb18f5 # Parent 9abc55ef85b58ce784ccb009aed7dfc395914239 ancestor: add lazy membership testing to lazyancestors This also makes the perfancestorset command use lazy membership testing. In a linear repository with over 400,000 commits, without this patch, hg perfancestorset takes 0.80 seconds no matter how far behind we're looking. With this patch, hg perfancestorset -- X takes: Rev X Time -1 0.00s -4000 0.01s -20000 0.04s -80000 0.17s -200000 0.43s -300000 0.69s 0 0.88s Thus, for revisions close to tip, we're up to several orders of magnitude faster. At 0 we're around 10% slower. diff --git a/contrib/perf.py b/contrib/perf.py --- a/contrib/perf.py +++ b/contrib/perf.py @@ -82,7 +82,7 @@ def perfancestorset(ui, repo, revset): revs = repo.revs(revset) heads = repo.changelog.headrevs() def d(): - s = set(repo.changelog.ancestors(heads)) + s = repo.changelog.ancestors(heads) for rev in revs: rev in s timer(d) diff --git a/mercurial/ancestor.py b/mercurial/ancestor.py --- a/mercurial/ancestor.py +++ b/mercurial/ancestor.py @@ -177,8 +177,8 @@ class lazyancestors(object): """Create a new object generating ancestors for the given revs. Does not generate revs lower than stoprev. - This is computed lazily starting from revs. The object only supports - iteration. + This is computed lazily starting from revs. The object supports + iteration and membership. cl should be a changelog and revs should be an iterable. inclusive is a boolean that indicates whether revs should be included. Revs lower @@ -190,6 +190,19 @@ class lazyancestors(object): self._stoprev = stoprev self._inclusive = inclusive + # Initialize data structures for __contains__. + # For __contains__, we use a heap rather than a deque because + # (a) it minimizes the number of parentrevs calls made + # (b) it makes the loop termination condition obvious + # Python's heap is a min-heap. Multiply all values by -1 to convert it + # into a max-heap. + self._containsvisit = [-rev for rev in revs] + heapq.heapify(self._containsvisit) + if inclusive: + self._containsseen = set(revs) + else: + self._containsseen = set() + def __iter__(self): """Generate the ancestors of _initrevs in reverse topological order. @@ -219,3 +232,33 @@ class lazyancestors(object): visit.append(parent) seen.add(parent) yield parent + + def __contains__(self, target): + """Test whether target is an ancestor of self._initrevs.""" + # Trying to do both __iter__ and __contains__ using the same visit + # heap and seen set is complex enough that it slows down both. Keep + # them separate. + seen = self._containsseen + if target in seen: + return True + + parentrevs = self._parentrevs + visit = self._containsvisit + stoprev = self._stoprev + heappop = heapq.heappop + heappush = heapq.heappush + + targetseen = False + + while visit and -visit[0] > target and not targetseen: + for parent in parentrevs(-heappop(visit)): + if parent < stoprev or parent in seen: + continue + # We need to make sure we push all parents into the heap so + # that we leave it in a consistent state for future calls. + heappush(visit, -parent) + seen.add(parent) + if parent == target: + targetseen = True + + return targetseen diff --git a/tests/test-ancestor.py b/tests/test-ancestor.py --- a/tests/test-ancestor.py +++ b/tests/test-ancestor.py @@ -34,6 +34,9 @@ graph = {0: [-1], 1: [0], 2: [1], 3: [1] 13: [8]} pfunc = graph.get +class mockchangelog(object): + parentrevs = graph.get + def runmissingancestors(revs, bases): print "%% ancestors of %s and not of %s" % (revs, bases) print ancestor.missingancestors(revs, bases, pfunc) @@ -70,5 +73,34 @@ def test_missingancestors(): runmissingancestors([10, 11, 12], [13]) runmissingancestors([13], [10, 11, 12]) +def genlazyancestors(revs, stoprev=0, inclusive=False): + print ("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" % + (revs, stoprev, inclusive)) + return ancestor.lazyancestors(mockchangelog, revs, stoprev=stoprev, + inclusive=inclusive) + +def printlazyancestors(s, l): + print [n for n in l if n in s] + +def test_lazyancestors(): + # Empty revs + s = genlazyancestors([]) + printlazyancestors(s, [3, 0, -1]) + + # Standard example + s = genlazyancestors([11, 13]) + printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0]) + + # Including revs + s = genlazyancestors([11, 13], inclusive=True) + printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0]) + + # Test with stoprev + s = genlazyancestors([11, 13], stoprev=6) + printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0]) + s = genlazyancestors([11, 13], stoprev=6, inclusive=True) + printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0]) + if __name__ == '__main__': test_missingancestors() + test_lazyancestors() diff --git a/tests/test-ancestor.py.out b/tests/test-ancestor.py.out --- a/tests/test-ancestor.py.out +++ b/tests/test-ancestor.py.out @@ -34,3 +34,13 @@ [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12] % ancestors of [13] and not of [10, 11, 12] [8, 13] +% lazy ancestor set for [], stoprev = 0, inclusive = False +[] +% lazy ancestor set for [11, 13], stoprev = 0, inclusive = False +[7, 8, 3, 4, 1, 0] +% lazy ancestor set for [11, 13], stoprev = 0, inclusive = True +[11, 13, 7, 8, 3, 4, 1, 0] +% lazy ancestor set for [11, 13], stoprev = 6, inclusive = False +[7, 8] +% lazy ancestor set for [11, 13], stoprev = 6, inclusive = True +[11, 13, 7, 8]