##// END OF EJS Templates
lazyancestors: extract __iter__ to free function...
lazyancestors: extract __iter__ to free function The next patch will keep a reference to the returned iterator in a field, which would otherwise result in a reference cycle. Differential Revision: https://phab.mercurial-scm.org/D4517

File last commit:

r39517:b6a0e06b default
r39517:b6a0e06b default
Show More
ancestor.py
382 lines | 12.8 KiB | text/x-python | PythonLexer
Matt Mackall
Abstract ancestor algorithm into generic function...
r3135 # ancestor.py - generic DAG ancestor algorithm for mercurial
#
# Copyright 2006 Matt Mackall <mpm@selenic.com>
#
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Matt Mackall
Abstract ancestor algorithm into generic function...
r3135
Gregory Szorc
ancestor: use absolute_import...
r25915 from __future__ import absolute_import
Augie Fackler
cleanup: move stdlib imports to their own import statement...
r20034 import heapq
Gregory Szorc
ancestor: use absolute_import...
r25915
from .node import nullrev
Gregory Szorc
global: use pycompat.xrange()...
r38806 from . import (
pycompat,
)
Matt Mackall
Abstract ancestor algorithm into generic function...
r3135
Mads Kiilerich
ancestors: extract candidates function as commonancestorsheads
r21101 def commonancestorsheads(pfunc, *nodes):
"""Returns a set with the heads of all common ancestors of all nodes,
heads(::nodes[0] and ::nodes[1] and ...) .
pfunc must return a list of parent vertices for a given vertex.
"""
if not isinstance(nodes, set):
nodes = set(nodes)
if nullrev in nodes:
return set()
if len(nodes) <= 1:
return nodes
allseen = (1 << len(nodes)) - 1
seen = [0] * (max(nodes) + 1)
for i, n in enumerate(nodes):
seen[n] = 1 << i
poison = 1 << (i + 1)
gca = set()
interesting = len(nodes)
nv = len(seen) - 1
while nv >= 0 and interesting:
v = nv
nv -= 1
if not seen[v]:
continue
sv = seen[v]
if sv < poison:
interesting -= 1
if sv == allseen:
gca.add(v)
sv |= poison
if v in nodes:
# history is linear
Martin von Zweigbergk
cleanup: use set literals...
r32291 return {v}
Mads Kiilerich
ancestors: extract candidates function as commonancestorsheads
r21101 if sv < poison:
for p in pfunc(v):
sp = seen[p]
if p == nullrev:
continue
if sp == 0:
seen[p] = sv
interesting += 1
elif sp != sv:
seen[p] |= sv
else:
for p in pfunc(v):
if p == nullrev:
continue
sp = seen[p]
if sp and sp < poison:
interesting -= 1
seen[p] = sv
return gca
Bryan O'Sullivan
ancestor: a new algorithm that is faster for nodes near tip...
r18986 def ancestors(pfunc, *orignodes):
"""
Returns the common ancestors of a and b that are furthest from a
root (as measured by longest path).
pfunc must return a list of parent vertices for a given vertex.
"""
def deepest(nodes):
interesting = {}
count = max(nodes) + 1
depth = [0] * count
seen = [0] * count
mapping = []
for (i, n) in enumerate(sorted(nodes)):
depth[n] = 1
b = 1 << i
seen[n] = b
interesting[b] = 1
mapping.append((b, n))
nv = count - 1
while nv >= 0 and len(interesting) > 1:
v = nv
nv -= 1
dv = depth[v]
if dv == 0:
continue
sv = seen[v]
for p in pfunc(v):
if p == nullrev:
continue
dp = depth[p]
nsp = sp = seen[p]
if dp <= dv:
depth[p] = dv + 1
if sp != sv:
interesting[sv] += 1
nsp = seen[p] = sv
if sp:
interesting[sp] -= 1
if interesting[sp] == 0:
del interesting[sp]
elif dv == dp - 1:
nsp = sp | sv
if nsp == sp:
continue
seen[p] = nsp
interesting.setdefault(nsp, 0)
interesting[nsp] += 1
interesting[sp] -= 1
if interesting[sp] == 0:
del interesting[sp]
interesting[sv] -= 1
if interesting[sv] == 0:
del interesting[sv]
if len(interesting) != 1:
return []
k = 0
for i in interesting:
k |= i
return set(n for (i, n) in mapping if k & i)
Mads Kiilerich
ancestors: extract candidates function as commonancestorsheads
r21101 gca = commonancestorsheads(pfunc, *orignodes)
Bryan O'Sullivan
ancestor: a new algorithm that is faster for nodes near tip...
r18986
if len(gca) <= 1:
return gca
return deepest(gca)
Siddharth Agarwal
ancestor.missingancestors: turn into a state-keeping class...
r23334 class incrementalmissingancestors(object):
'''persistent state used to calculate missing ancestors incrementally
Siddharth Agarwal
ancestor: faster algorithm for difference of ancestor sets...
r17970
Siddharth Agarwal
ancestor.missingancestors: turn into a state-keeping class...
r23334 Although similar in spirit to lazyancestors below, this is a separate class
because trying to support contains and missingancestors operations with the
same internal data structures adds needless complexity.'''
def __init__(self, pfunc, bases):
self.bases = set(bases)
if not self.bases:
self.bases.add(nullrev)
self.pfunc = pfunc
Siddharth Agarwal
ancestor: faster algorithm for difference of ancestor sets...
r17970
Siddharth Agarwal
ancestor: add a way to test whether a missing ancestor object has bases...
r23340 def hasbases(self):
'''whether the common set has any non-trivial bases'''
Martin von Zweigbergk
cleanup: use set literals...
r32291 return self.bases and self.bases != {nullrev}
Siddharth Agarwal
ancestor: add a way to test whether a missing ancestor object has bases...
r23340
Siddharth Agarwal
ancestor: add a way to add to bases of a missing ancestor object...
r23341 def addbases(self, newbases):
'''grow the ancestor set by adding new bases'''
self.bases.update(newbases)
Siddharth Agarwal
ancestor: add a way to remove ancestors of bases from a given set...
r23342 def removeancestorsfrom(self, revs):
'''remove all ancestors of bases from the set revs (in place)'''
bases = self.bases
pfunc = self.pfunc
revs.difference_update(bases)
# nullrev is always an ancestor
revs.discard(nullrev)
if not revs:
return
# anything in revs > start is definitely not an ancestor of bases
# revs <= start needs to be investigated
start = max(bases)
keepcount = sum(1 for r in revs if r > start)
if len(revs) == keepcount:
# no revs to consider
return
Gregory Szorc
global: use pycompat.xrange()...
r38806 for curr in pycompat.xrange(start, min(revs) - 1, -1):
Siddharth Agarwal
ancestor: add a way to remove ancestors of bases from a given set...
r23342 if curr not in bases:
continue
revs.discard(curr)
bases.update(pfunc(curr))
if len(revs) == keepcount:
# no more potential revs to discard
break
Siddharth Agarwal
ancestor.missingancestors: turn into a state-keeping class...
r23334 def missingancestors(self, revs):
'''return all the ancestors of revs that are not ancestors of self.bases
This may include elements from revs.
Siddharth Agarwal
ancestor: faster algorithm for difference of ancestor sets...
r17970
Siddharth Agarwal
ancestor.missingancestors: turn into a state-keeping class...
r23334 Equivalent to the revset (::revs - ::self.bases). Revs are returned in
revision number order, which is a topological order.'''
revsvisit = set(revs)
basesvisit = self.bases
pfunc = self.pfunc
bothvisit = revsvisit.intersection(basesvisit)
revsvisit.difference_update(bothvisit)
Siddharth Agarwal
ancestor: faster algorithm for difference of ancestor sets...
r17970 if not revsvisit:
Siddharth Agarwal
ancestor.missingancestors: turn into a state-keeping class...
r23334 return []
Siddharth Agarwal
ancestor: faster algorithm for difference of ancestor sets...
r17970
Siddharth Agarwal
ancestor.missingancestors: turn into a state-keeping class...
r23334 start = max(max(revsvisit), max(basesvisit))
# At this point, we hold the invariants that:
# - revsvisit is the set of nodes we know are an ancestor of at least
# one of the nodes in revs
# - basesvisit is the same for bases
# - bothvisit is the set of nodes we know are ancestors of at least one
# of the nodes in revs and one of the nodes in bases. bothvisit and
# revsvisit are mutually exclusive, but bothvisit is a subset of
# basesvisit.
# Now we walk down in reverse topo order, adding parents of nodes
# already visited to the sets while maintaining the invariants. When a
# node is found in both revsvisit and basesvisit, it is removed from
# revsvisit and added to bothvisit. When revsvisit becomes empty, there
# are no more ancestors of revs that aren't also ancestors of bases, so
# exit.
missing = []
Gregory Szorc
global: use pycompat.xrange()...
r38806 for curr in pycompat.xrange(start, nullrev, -1):
Siddharth Agarwal
ancestor.missingancestors: turn into a state-keeping class...
r23334 if not revsvisit:
break
Siddharth Agarwal
ancestor: faster algorithm for difference of ancestor sets...
r17970
Siddharth Agarwal
ancestor.missingancestors: turn into a state-keeping class...
r23334 if curr in bothvisit:
bothvisit.remove(curr)
# curr's parents might have made it into revsvisit through
# another path
for p in pfunc(curr):
revsvisit.discard(p)
basesvisit.add(p)
bothvisit.add(p)
continue
Siddharth Agarwal
ancestor: faster algorithm for difference of ancestor sets...
r17970
Siddharth Agarwal
ancestor.missingancestors: turn into a state-keeping class...
r23334 if curr in revsvisit:
missing.append(curr)
revsvisit.remove(curr)
thisvisit = revsvisit
othervisit = basesvisit
elif curr in basesvisit:
thisvisit = basesvisit
othervisit = revsvisit
Siddharth Agarwal
ancestor: faster algorithm for difference of ancestor sets...
r17970 else:
Siddharth Agarwal
ancestor.missingancestors: turn into a state-keeping class...
r23334 # not an ancestor of revs or bases: ignore
continue
Siddharth Agarwal
ancestor: faster algorithm for difference of ancestor sets...
r17970
Siddharth Agarwal
ancestor.missingancestors: turn into a state-keeping class...
r23334 for p in pfunc(curr):
if p == nullrev:
pass
elif p in othervisit or p in bothvisit:
# p is implicitly in thisvisit. This means p is or should be
# in bothvisit
revsvisit.discard(p)
basesvisit.add(p)
bothvisit.add(p)
else:
# visit later
thisvisit.add(p)
missing.reverse()
return missing
Martin von Zweigbergk
lazyancestors: extract __iter__ to free function...
r39517 # Extracted from lazyancestors.__iter__ to avoid a reference cycle
def _lazyancestorsiter(parentrevs, initrevs, stoprev, inclusive):
seen = {nullrev}
revs = initrevs
schedule = heapq.heappush
nextitem = heapq.heappop
see = seen.add
if inclusive:
visit = [-r for r in revs]
seen.update(revs)
heapq.heapify(visit)
else:
visit = []
heapq.heapify(visit)
for r in revs:
for parent in parentrevs(r):
if parent not in seen:
schedule(visit, -parent)
see(parent)
while visit:
current = -nextitem(visit)
if current >= stoprev:
yield current
for parent in parentrevs(current):
if parent not in seen:
schedule(visit, -parent)
see(parent)
Siddharth Agarwal
revlog: move ancestor generation out to a new class...
r18090 class lazyancestors(object):
Siddharth Agarwal
ancestor.lazyancestors: take parentrevs function rather than changelog...
r23328 def __init__(self, pfunc, revs, stoprev=0, inclusive=False):
Siddharth Agarwal
revlog: move ancestor generation out to a new class...
r18090 """Create a new object generating ancestors for the given revs. Does
not generate revs lower than stoprev.
Siddharth Agarwal
ancestor: add lazy membership testing to lazyancestors...
r18091 This is computed lazily starting from revs. The object supports
iteration and membership.
Siddharth Agarwal
revlog: move ancestor generation out to a new class...
r18090
cl should be a changelog and revs should be an iterable. inclusive is
a boolean that indicates whether revs should be included. Revs lower
than stoprev will not be generated.
Result does not include the null revision."""
Siddharth Agarwal
ancestor.lazyancestors: take parentrevs function rather than changelog...
r23328 self._parentrevs = pfunc
Yuya Nishihara
ancestor: filter out initial revisions lower than stoprev
r39512 self._initrevs = revs = [r for r in revs if r >= stoprev]
Siddharth Agarwal
revlog: move ancestor generation out to a new class...
r18090 self._stoprev = stoprev
self._inclusive = inclusive
Siddharth Agarwal
ancestor: add lazy membership testing to lazyancestors...
r18091 # Initialize data structures for __contains__.
# For __contains__, we use a heap rather than a deque because
# (a) it minimizes the number of parentrevs calls made
# (b) it makes the loop termination condition obvious
# Python's heap is a min-heap. Multiply all values by -1 to convert it
# into a max-heap.
self._containsvisit = [-rev for rev in revs]
heapq.heapify(self._containsvisit)
if inclusive:
self._containsseen = set(revs)
else:
self._containsseen = set()
Pierre-Yves David
ancestors: add a __nonzero__ method...
r22225 def __nonzero__(self):
"""False if the set is empty, True otherwise."""
try:
timeless
py3: convert to next() function...
r29216 next(iter(self))
Pierre-Yves David
ancestors: add a __nonzero__ method...
r22225 return True
except StopIteration:
return False
Gregory Szorc
py3: add __bool__ to every class defining __nonzero__...
r31476 __bool__ = __nonzero__
Siddharth Agarwal
revlog: move ancestor generation out to a new class...
r18090 def __iter__(self):
"""Generate the ancestors of _initrevs in reverse topological order.
If inclusive is False, yield a sequence of revision numbers starting
Boris Feld
ancestors: actually iterate over ancestors in topological order (issue5979)...
r39509 with the parents of each revision in revs, i.e., each revision is
*not* considered an ancestor of itself. Results are emitted in reverse
revision number order. That order is also topological: a child is
always emitted before its parent.
Siddharth Agarwal
revlog: move ancestor generation out to a new class...
r18090
Boris Feld
ancestors: ensure a consistent order even in the "inclusive" case...
r39510 If inclusive is True, the source revisions are also yielded. The
reverse revision number order is still enforced."""
Martin von Zweigbergk
lazyancestors: extract __iter__ to free function...
r39517 for rev in _lazyancestorsiter(self._parentrevs, self._initrevs,
self._stoprev, self._inclusive):
yield rev
Siddharth Agarwal
ancestor: add lazy membership testing to lazyancestors...
r18091
def __contains__(self, target):
"""Test whether target is an ancestor of self._initrevs."""
# Trying to do both __iter__ and __contains__ using the same visit
# heap and seen set is complex enough that it slows down both. Keep
# them separate.
seen = self._containsseen
if target in seen:
return True
Yuya Nishihara
py3: make 'None in lazyancestors' not crash...
r38614 # Only integer target is valid, but some callers expect 'None in self'
# to be False. So we explicitly allow it.
if target is None:
return False
Siddharth Agarwal
ancestor: add lazy membership testing to lazyancestors...
r18091
parentrevs = self._parentrevs
visit = self._containsvisit
stoprev = self._stoprev
heappop = heapq.heappop
heappush = heapq.heappush
Pierre-Yves David
ancestors: prefetch method outside of the loop...
r25639 see = seen.add
Siddharth Agarwal
ancestor: add lazy membership testing to lazyancestors...
r18091
targetseen = False
while visit and -visit[0] > target and not targetseen:
for parent in parentrevs(-heappop(visit)):
if parent < stoprev or parent in seen:
continue
# We need to make sure we push all parents into the heap so
# that we leave it in a consistent state for future calls.
heappush(visit, -parent)
Pierre-Yves David
ancestors: prefetch method outside of the loop...
r25639 see(parent)
Siddharth Agarwal
ancestor: add lazy membership testing to lazyancestors...
r18091 if parent == target:
targetseen = True
return targetseen