upstream/mercurial-mirror Commit - r39572:bdb17792

ancestor: optimize _lazyancestorsiter() for contiguous chains...

Yuya Nishihara -

r39572:bdb17792 default

parent child

mercurial/ancestor.py

0 +10 -2

             # ancestor.py - generic DAG ancestor algorithm for mercurial
             #
             # Copyright 2006 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import heapq
             from .node import nullrev
             from . import (
                 pycompat,
             )
             def commonancestorsheads(pfunc, *nodes):
                 """Returns a set with the heads of all common ancestors of all nodes,
                 heads(::nodes[0] and ::nodes[1] and ...) .
                 pfunc must return a list of parent vertices for a given vertex.
                 """
                 if not isinstance(nodes, set):
                     nodes = set(nodes)
                 if nullrev in nodes:
                     return set()
                 if len(nodes) <= 1:
                     return nodes
                 allseen = (1 << len(nodes)) - 1
                 seen = [0] * (max(nodes) + 1)
                 for i, n in enumerate(nodes):
                     seen[n] = 1 << i
                 poison = 1 << (i + 1)
                 gca = set()
                 interesting = len(nodes)
                 nv = len(seen) - 1
                 while nv >= 0 and interesting:
                     v = nv
                     nv -= 1
                     if not seen[v]:
                         continue
                     sv = seen[v]
                     if sv < poison:
                         interesting -= 1
                         if sv == allseen:
                             gca.add(v)
                             sv |= poison
                             if v in nodes:
                                 # history is linear
                                 return {v}
                     if sv < poison:
                         for p in pfunc(v):
                             sp = seen[p]
                             if p == nullrev:
                                 continue
                             if sp == 0:
                                 seen[p] = sv
                                 interesting += 1
                             elif sp != sv:
                                 seen[p] |= sv
                     else:
                         for p in pfunc(v):
                             if p == nullrev:
                                 continue
                             sp = seen[p]
                             if sp and sp < poison:
                                 interesting -= 1
                             seen[p] = sv
                 return gca
             def ancestors(pfunc, *orignodes):
                 """
                 Returns the common ancestors of a and b that are furthest from a
                 root (as measured by longest path).
                 pfunc must return a list of parent vertices for a given vertex.
                 """
                 def deepest(nodes):
                     interesting = {}
                     count = max(nodes) + 1
                     depth = [0] * count
                     seen = [0] * count
                     mapping = []
                     for (i, n) in enumerate(sorted(nodes)):
                         depth[n] = 1
                         b = 1 << i
                         seen[n] = b
                         interesting[b] = 1
                         mapping.append((b, n))
                     nv = count - 1
                     while nv >= 0 and len(interesting) > 1:
                         v = nv
                         nv -= 1
                         dv = depth[v]
                         if dv == 0:
                             continue
                         sv = seen[v]
                         for p in pfunc(v):
                             if p == nullrev:
                                 continue
                             dp = depth[p]
                             nsp = sp = seen[p]
                             if dp <= dv:
                                 depth[p] = dv + 1
                                 if sp != sv:
                                     interesting[sv] += 1
                                     nsp = seen[p] = sv
                                     if sp:
                                         interesting[sp] -= 1
                                         if interesting[sp] == 0:
                                             del interesting[sp]
                             elif dv == dp - 1:
                                 nsp = sp | sv
                                 if nsp == sp:
                                     continue
                                 seen[p] = nsp
                                 interesting.setdefault(nsp, 0)
                                 interesting[nsp] += 1
                                 interesting[sp] -= 1
                                 if interesting[sp] == 0:
                                     del interesting[sp]
                         interesting[sv] -= 1
                         if interesting[sv] == 0:
                             del interesting[sv]
                     if len(interesting) != 1:
                         return []
                     k = 0
                     for i in interesting:
                         k |= i
                     return set(n for (i, n) in mapping if k & i)
                 gca = commonancestorsheads(pfunc, *orignodes)
                 if len(gca) <= 1:
                     return gca
                 return deepest(gca)
             class incrementalmissingancestors(object):
                 '''persistent state used to calculate missing ancestors incrementally
                 Although similar in spirit to lazyancestors below, this is a separate class
                 because trying to support contains and missingancestors operations with the
                 same internal data structures adds needless complexity.'''
                 def __init__(self, pfunc, bases):
                     self.bases = set(bases)
                     if not self.bases:
                         self.bases.add(nullrev)
                     self.pfunc = pfunc
                 def hasbases(self):
                     '''whether the common set has any non-trivial bases'''
                     return self.bases and self.bases != {nullrev}
                 def addbases(self, newbases):
                     '''grow the ancestor set by adding new bases'''
                     self.bases.update(newbases)
                 def removeancestorsfrom(self, revs):
                     '''remove all ancestors of bases from the set revs (in place)'''
                     bases = self.bases
                     pfunc = self.pfunc
                     revs.difference_update(bases)
                     # nullrev is always an ancestor
                     revs.discard(nullrev)
                     if not revs:
                         return
                     # anything in revs > start is definitely not an ancestor of bases
                     # revs <= start needs to be investigated
                     start = max(bases)
                     keepcount = sum(1 for r in revs if r > start)
                     if len(revs) == keepcount:
                         # no revs to consider
                         return
                     for curr in pycompat.xrange(start, min(revs) - 1, -1):
                         if curr not in bases:
                             continue
                         revs.discard(curr)
                         bases.update(pfunc(curr))
                         if len(revs) == keepcount:
                             # no more potential revs to discard
                             break
                 def missingancestors(self, revs):
                     '''return all the ancestors of revs that are not ancestors of self.bases
                     This may include elements from revs.
                     Equivalent to the revset (::revs - ::self.bases). Revs are returned in
                     revision number order, which is a topological order.'''
                     revsvisit = set(revs)
                     basesvisit = self.bases
                     pfunc = self.pfunc
                     bothvisit = revsvisit.intersection(basesvisit)
                     revsvisit.difference_update(bothvisit)
                     if not revsvisit:
                         return []
                     start = max(max(revsvisit), max(basesvisit))
                     # At this point, we hold the invariants that:
                     # - revsvisit is the set of nodes we know are an ancestor of at least
                     #   one of the nodes in revs
                     # - basesvisit is the same for bases
                     # - bothvisit is the set of nodes we know are ancestors of at least one
                     #   of the nodes in revs and one of the nodes in bases. bothvisit and
                     #   revsvisit are mutually exclusive, but bothvisit is a subset of
                     #   basesvisit.
                     # Now we walk down in reverse topo order, adding parents of nodes
                     # already visited to the sets while maintaining the invariants. When a
                     # node is found in both revsvisit and basesvisit, it is removed from
                     # revsvisit and added to bothvisit. When revsvisit becomes empty, there
                     # are no more ancestors of revs that aren't also ancestors of bases, so
                     # exit.
                     missing = []
                     for curr in pycompat.xrange(start, nullrev, -1):
                         if not revsvisit:
                             break
                         if curr in bothvisit:
                             bothvisit.remove(curr)
                             # curr's parents might have made it into revsvisit through
                             # another path
                             for p in pfunc(curr):
                                 revsvisit.discard(p)
                                 basesvisit.add(p)
                                 bothvisit.add(p)
                             continue
                         if curr in revsvisit:
                             missing.append(curr)
                             revsvisit.remove(curr)
                             thisvisit = revsvisit
                             othervisit = basesvisit
                         elif curr in basesvisit:
                             thisvisit = basesvisit
                             othervisit = revsvisit
                         else:
                             # not an ancestor of revs or bases: ignore
                             continue
                         for p in pfunc(curr):
                             if p == nullrev:
                                 pass
                             elif p in othervisit or p in bothvisit:
                                 # p is implicitly in thisvisit. This means p is or should be
                                 # in bothvisit
                                 revsvisit.discard(p)
                                 basesvisit.add(p)
                                 bothvisit.add(p)
                             else:
                                 # visit later
                                 thisvisit.add(p)
                     missing.reverse()
                     return missing
             # Extracted from lazyancestors.__iter__ to avoid a reference cycle
             def _lazyancestorsiter(parentrevs, initrevs, stoprev, inclusive):
                 seen = {nullrev}
                 schedule = heapq.heappush
                 nextitem = heapq.heappop
                 see = seen.add
                 if inclusive:
                     visit = [-r for r in initrevs]
                     seen.update(initrevs)
                     heapq.heapify(visit)
                 else:
                     visit = []
                     heapq.heapify(visit)
                     for r in initrevs:
                         p1, p2 = parentrevs(r)
                         if p1 not in seen:
                             schedule(visit, -p1)
                             see(p1)
                         if p2 not in seen:
                             schedule(visit, -p2)
                             see(p2)
                 while visit:
-                    current = -nextitem(visit)
+                    current = -visit[0]
                     if current < stoprev:
                         break
                     yield current
+                    # optimize out heapq operation if p1 is known to be the next highest
+                    # revision, which is quite common in linear history.
                     p1, p2 = parentrevs(current)
                     if p1 not in seen:
-                        schedule(visit, -p1)
+                        if current - p1 == 1:
+                            visit[0] = -p1
+                        else:
+                            nextitem(visit)
+                            schedule(visit, -p1)
                         see(p1)
+                    else:
+                        nextitem(visit)
                     if p2 not in seen:
                         schedule(visit, -p2)
                         see(p2)
             class lazyancestors(object):
                 def __init__(self, pfunc, revs, stoprev=0, inclusive=False):
                     """Create a new object generating ancestors for the given revs. Does
                     not generate revs lower than stoprev.
                     This is computed lazily starting from revs. The object supports
                     iteration and membership.
                     cl should be a changelog and revs should be an iterable. inclusive is
                     a boolean that indicates whether revs should be included. Revs lower
                     than stoprev will not be generated.
                     Result does not include the null revision."""
                     self._parentrevs = pfunc
                     self._initrevs = revs = [r for r in revs if r >= stoprev]
                     self._stoprev = stoprev
                     self._inclusive = inclusive
                     self._containsseen = set()
                     self._containsiter = _lazyancestorsiter(self._parentrevs,
                                                             self._initrevs,
                                                             self._stoprev,
                                                             self._inclusive)
                 def __nonzero__(self):
                     """False if the set is empty, True otherwise."""
                     try:
                         next(iter(self))
                         return True
                     except StopIteration:
                         return False
                 __bool__ = __nonzero__
                 def __iter__(self):
                     """Generate the ancestors of _initrevs in reverse topological order.
                     If inclusive is False, yield a sequence of revision numbers starting
                     with the parents of each revision in revs, i.e., each revision is
                     *not* considered an ancestor of itself. Results are emitted in reverse
                     revision number order. That order is also topological: a child is
                     always emitted before its parent.
                     If inclusive is True, the source revisions are also yielded. The
                     reverse revision number order is still enforced."""
                     for rev in _lazyancestorsiter(self._parentrevs, self._initrevs,
                                                   self._stoprev, self._inclusive):
                         yield rev
                 def __contains__(self, target):
                     """Test whether target is an ancestor of self._initrevs."""
                     seen = self._containsseen
                     if target in seen:
                         return True
                     iter = self._containsiter
                     if iter is None:
                         # Iterator exhausted
                         return False
                     # Only integer target is valid, but some callers expect 'None in self'
                     # to be False. So we explicitly allow it.
                     if target is None:
                         return False
                     see = seen.add
                     try:
                         while True:
                             rev = next(iter)
                             see(rev)
                             if rev == target:
                                 return True
                             if rev < target:
                                 return False
                     except StopIteration:
                         # Set to None to indicate fast-path can be used next time, and to
                         # free up memory.
                         self._containsiter = None
                         return False

tests/test-ancestor.py

0 +4 0

             from __future__ import absolute_import, print_function
             import binascii
             import getopt
             import math
             import os
             import random
             import sys
             import time
             from mercurial.node import nullrev
             from mercurial import (
                 ancestor,
                 debugcommands,
                 hg,
                 pycompat,
                 ui as uimod,
                 util,
             )
             if pycompat.ispy3:
                 long = int
                 xrange = range
             def buildgraph(rng, nodes=100, rootprob=0.05, mergeprob=0.2, prevprob=0.7):
                 '''nodes: total number of nodes in the graph
                 rootprob: probability that a new node (not 0) will be a root
                 mergeprob: probability that, excluding a root a node will be a merge
                 prevprob: probability that p1 will be the previous node
                 return value is a graph represented as an adjacency list.
                 '''
                 graph = [None] * nodes
                 for i in xrange(nodes):
                     if i == 0 or rng.random() < rootprob:
                         graph[i] = [nullrev]
                     elif i == 1:
                         graph[i] = [0]
                     elif rng.random() < mergeprob:
                         if i == 2 or rng.random() < prevprob:
                             # p1 is prev
                             p1 = i - 1
                         else:
                             p1 = rng.randrange(i - 1)
                         p2 = rng.choice(list(range(0, p1)) + list(range(p1 + 1, i)))
                         graph[i] = [p1, p2]
                     elif rng.random() < prevprob:
                         graph[i] = [i - 1]
                     else:
                         graph[i] = [rng.randrange(i - 1)]
                 return graph
             def buildancestorsets(graph):
                 ancs = [None] * len(graph)
                 for i in xrange(len(graph)):
                     ancs[i] = {i}
                     if graph[i] == [nullrev]:
                         continue
                     for p in graph[i]:
                         ancs[i].update(ancs[p])
                 return ancs
             class naiveincrementalmissingancestors(object):
                 def __init__(self, ancs, bases):
                     self.ancs = ancs
                     self.bases = set(bases)
                 def addbases(self, newbases):
                     self.bases.update(newbases)
                 def removeancestorsfrom(self, revs):
                     for base in self.bases:
                         if base != nullrev:
                             revs.difference_update(self.ancs[base])
                     revs.discard(nullrev)
                 def missingancestors(self, revs):
                     res = set()
                     for rev in revs:
                         if rev != nullrev:
                             res.update(self.ancs[rev])
                     for base in self.bases:
                         if base != nullrev:
                             res.difference_update(self.ancs[base])
                     return sorted(res)
             def test_missingancestors(seed, rng):
                 # empirically observed to take around 1 second
                 graphcount = 100
                 testcount = 10
                 inccount = 10
                 nerrs = [0]
                 # the default mu and sigma give us a nice distribution of mostly
                 # single-digit counts (including 0) with some higher ones
                 def lognormrandom(mu, sigma):
                     return int(math.floor(rng.lognormvariate(mu, sigma)))
                 def samplerevs(nodes, mu=1.1, sigma=0.8):
                     count = min(lognormrandom(mu, sigma), len(nodes))
                     return rng.sample(nodes, count)
                 def err(seed, graph, bases, seq, output, expected):
                     if nerrs[0] == 0:
                         print('seed:', hex(seed)[:-1], file=sys.stderr)
                     if gerrs[0] == 0:
                         print('graph:', graph, file=sys.stderr)
                     print('* bases:', bases, file=sys.stderr)
                     print('* seq: ', seq, file=sys.stderr)
                     print('*  output:  ', output, file=sys.stderr)
                     print('*  expected:', expected, file=sys.stderr)
                     nerrs[0] += 1
                     gerrs[0] += 1
                 for g in xrange(graphcount):
                     graph = buildgraph(rng)
                     ancs = buildancestorsets(graph)
                     gerrs = [0]
                     for _ in xrange(testcount):
                         # start from nullrev to include it as a possibility
                         graphnodes = range(nullrev, len(graph))
                         bases = samplerevs(graphnodes)
                         # fast algorithm
                         inc = ancestor.incrementalmissingancestors(graph.__getitem__, bases)
                         # reference slow algorithm
                         naiveinc = naiveincrementalmissingancestors(ancs, bases)
                         seq = []
                         revs = []
                         for _ in xrange(inccount):
                             if rng.random() < 0.2:
                                 newbases = samplerevs(graphnodes)
                                 seq.append(('addbases', newbases))
                                 inc.addbases(newbases)
                                 naiveinc.addbases(newbases)
                             if rng.random() < 0.4:
                                 # larger set so that there are more revs to remove from
                                 revs = samplerevs(graphnodes, mu=1.5)
                                 seq.append(('removeancestorsfrom', revs))
                                 hrevs = set(revs)
                                 rrevs = set(revs)
                                 inc.removeancestorsfrom(hrevs)
                                 naiveinc.removeancestorsfrom(rrevs)
                                 if hrevs != rrevs:
                                     err(seed, graph, bases, seq, sorted(hrevs),
                                         sorted(rrevs))
                             else:
                                 revs = samplerevs(graphnodes)
                                 seq.append(('missingancestors', revs))
                                 h = inc.missingancestors(revs)
                                 r = naiveinc.missingancestors(revs)
                                 if h != r:
                                     err(seed, graph, bases, seq, h, r)
             # graph is a dict of child->parent adjacency lists for this graph:
             # o  13
             # |
             # | o  12
             # | |
             # | | o    11
             # | | |\
             # | | | | o  10
             # | | | | |
             # | o---+ |  9
             # | | | | |
             # o | | | |  8
             #  / / / /
             # | | o |  7
             # | | | |
             # o---+ |  6
             #  / / /
             # | | o  5
             # | |/
             # | o  4
             # | |
             # o |  3
             # | |
             # | o  2
             # |/
             # o  1
             # |
             # o  0
             graph = {0: [-1, -1], 1: [0, -1], 2: [1, -1], 3: [1, -1], 4: [2, -1],
 : [4, -1], 6: [4, -1], 7: [4, -1], 8: [-1, -1], 9: [6, 7],
 : [5, -1], 11: [3, 7], 12: [9, -1], 13: [8, -1]}
             def genlazyancestors(revs, stoprev=0, inclusive=False):
                 print(("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" %
                        (revs, stoprev, inclusive)))
                 return ancestor.lazyancestors(graph.get, revs, stoprev=stoprev,
                                               inclusive=inclusive)
             def printlazyancestors(s, l):
                 print('membership: %r' % [n for n in l if n in s])
                 print('iteration:  %r' % list(s))
             def test_lazyancestors():
                 # Empty revs
                 s = genlazyancestors([])
                 printlazyancestors(s, [3, 0, -1])
                 # Standard example
                 s = genlazyancestors([11, 13])
                 printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
                 # Standard with ancestry in the initial set (1 is ancestor of 3)
                 s = genlazyancestors([1, 3])
                 printlazyancestors(s, [1, -1, 0])
                 # Including revs
                 s = genlazyancestors([11, 13], inclusive=True)
                 printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
                 # Test with stoprev
                 s = genlazyancestors([11, 13], stoprev=6)
                 printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
                 s = genlazyancestors([11, 13], stoprev=6, inclusive=True)
                 printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
                 # Test with stoprev >= min(initrevs)
                 s = genlazyancestors([11, 13], stoprev=11, inclusive=True)
                 printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
                 s = genlazyancestors([11, 13], stoprev=12, inclusive=True)
                 printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+                # Contiguous chains: 5->4, 2->1 (where 1 is in seen set), 1->0
+                s = genlazyancestors([10, 1], inclusive=True)
+                printlazyancestors(s, [2, 10, 4, 5, -1, 0, 1])
             # The C gca algorithm requires a real repo. These are textual descriptions of
             # DAGs that have been known to be problematic, and, optionally, known pairs
             # of revisions and their expected ancestor list.
             dagtests = [
                 (b'+2*2*2/*3/2', {}),
                 (b'+3*3/*2*2/*4*4/*4/2*4/2*2', {}),
                 (b'+2*2*/2*4*/4*/3*2/4', {(6, 7): [3, 5]}),
             ]
             def test_gca():
                 u = uimod.ui.load()
                 for i, (dag, tests) in enumerate(dagtests):
                     repo = hg.repository(u, b'gca%d' % i, create=1)
                     cl = repo.changelog
                     if not util.safehasattr(cl.index, 'ancestors'):
                         # C version not available
                         return
                     debugcommands.debugbuilddag(u, repo, dag)
                     # Compare the results of the Python and C versions. This does not
                     # include choosing a winner when more than one gca exists -- we make
                     # sure both return exactly the same set of gcas.
                     # Also compare against expected results, if available.
                     for a in cl:
                         for b in cl:
                             cgcas = sorted(cl.index.ancestors(a, b))
                             pygcas = sorted(ancestor.ancestors(cl.parentrevs, a, b))
                             expected = None
                             if (a, b) in tests:
                                 expected = tests[(a, b)]
                             if cgcas != pygcas or (expected and cgcas != expected):
                                 print("test_gca: for dag %s, gcas for %d, %d:"
                                       % (dag, a, b))
                                 print("  C returned:      %s" % cgcas)
                                 print("  Python returned: %s" % pygcas)
                                 if expected:
                                     print("  expected:        %s" % expected)
             def main():
                 seed = None
                 opts, args = getopt.getopt(sys.argv[1:], 's:', ['seed='])
                 for o, a in opts:
                     if o in ('-s', '--seed'):
                         seed = long(a, base=0) # accepts base 10 or 16 strings
                 if seed is None:
                     try:
                         seed = long(binascii.hexlify(os.urandom(16)), 16)
                     except AttributeError:
                         seed = long(time.time() * 1000)
                 rng = random.Random(seed)
                 test_missingancestors(seed, rng)
                 test_lazyancestors()
                 test_gca()
             if __name__ == '__main__':
                 main()

tests/test-ancestor.py.out

0 +3 0

             % lazy ancestor set for [], stoprev = 0, inclusive = False
             membership: []
             iteration:  []
             % lazy ancestor set for [11, 13], stoprev = 0, inclusive = False
             membership: [7, 8, 3, 4, 1, 0]
             iteration:  [8, 7, 4, 3, 2, 1, 0]
             % lazy ancestor set for [1, 3], stoprev = 0, inclusive = False
             membership: [1, 0]
             iteration:  [1, 0]
             % lazy ancestor set for [11, 13], stoprev = 0, inclusive = True
             membership: [11, 13, 7, 8, 3, 4, 1, 0]
             iteration:  [13, 11, 8, 7, 4, 3, 2, 1, 0]
             % lazy ancestor set for [11, 13], stoprev = 6, inclusive = False
             membership: [7, 8]
             iteration:  [8, 7]
             % lazy ancestor set for [11, 13], stoprev = 6, inclusive = True
             membership: [11, 13, 7, 8]
             iteration:  [13, 11, 8, 7]
             % lazy ancestor set for [11, 13], stoprev = 11, inclusive = True
             membership: [11, 13]
             iteration:  [13, 11]
             % lazy ancestor set for [11, 13], stoprev = 12, inclusive = True
             membership: [13]
             iteration:  [13]
+            % lazy ancestor set for [10, 1], stoprev = 0, inclusive = True
+            membership: [2, 10, 4, 5, 0, 1]
+            iteration:  [10, 5, 4, 2, 1, 0]

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages